diff --git a/.circleci/config.yml b/.circleci/config.yml index e0679e816..6a73858cd 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -16,60 +16,66 @@ jobs: # To see the list of pre-built images that CircleCI provides for most common languages see # https://circleci.com/docs/2.0/circleci-images/ docker: - - image: circleci/python:3.6.9 + - image: cimg/python:3.13 steps: - # Machine Setup - # If you break your build into multiple jobs with workflows, you will probably want to do the parts of this that are relevant in each - # The following `checkout` command checks out your code to your working directory. In 1.0 we did this implicitly. In 2.0 you can choose where in the course of a job your code should be checked out. - - checkout - # Prepare for artifact and test results collection equivalent to how it was done on 1.0. - # In many cases you can simplify this from what is generated here. - # 'See docs on artifact collection here https://circleci.com/docs/2.0/artifacts/' - - run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS - # This is based on your 1.0 configuration file or project settings - - run: - working_directory: ~/rocky/python-uncompyle6 - command: pip install --user virtualenv && pip install --user nose && pip install --user pep8 - # Dependencies - # This would typically go in either a build or a build-and-test job when using workflows - # Restore the dependency cache - - restore_cache: - keys: - - v2-dependencies-{{ .Branch }}- - # fallback to using the latest cache if no exact match is found - - v2-dependencies- + # Machine Setup + # If you break your build into multiple jobs with workflows, you will probably want to do the parts of this that are relevant in each + # The following `checkout` command checks out your code to your working directory. In 1.0 we did this implicitly. In 2.0 you can choose where in the course of a job your code should be checked out. + - checkout + # Prepare for artifact and test results collection equivalent to how it was done on 1.0. + # In many cases you can simplify this from what is generated here. + # 'See docs on artifact collection here https://circleci.com/docs/2.0/artifacts/' + - run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS + # This is based on your 1.0 configuration file or project settings + - run: + working_directory: ~/rocky/python-uncompyle6 + command: pip install --user virtualenv && pip install --user nose && pip install + --user pep8 + # Dependencies + # This would typically go in either a build or a build-and-test job when using workflows + # Restore the dependency cache + - restore_cache: + keys: + - v2-dependencies-{{ .Branch }}- + # fallback to using the latest cache if no exact match is found + - v2-dependencies- - - run: - command: | # Use pip to install dependengcies - pip install --user --upgrade setuptools - pip install --user -e . - pip install --user -r requirements-dev.txt + - run: + command: | # Use pip to install dependengcies + # pip install --user --upgrade setuptools + pip install --user -e git+https://github.com/rocky/python-xdis.git#egg=xdis + pip install --user -e . + # Not sure why "pip install -e" doesn't work above + # pip install click spark-parser xdis + pip install --user -r requirements-dev.txt - # Save dependency cache - - save_cache: - key: v2-dependencies-{{ .Branch }}-{{ epoch }} - paths: - # This is a broad list of cache paths to include many possible development environments - # You can probably delete some of these entries - - vendor/bundle - - ~/virtualenvs - - ~/.m2 - - ~/.ivy2 - - ~/.bundle - - ~/.cache/bower + # Save dependency cache + - save_cache: + key: v2-dependencies-{{ .Branch }}-{{ epoch }} + paths: + # This is a broad list of cache paths to include many possible development environments + # You can probably delete some of these entries + - vendor/bundle + - ~/virtualenvs + - ~/.m2 + - ~/.ivy2 + - ~/.bundle + - ~/.cache/bower - # Test - # This would typically be a build job when using workflows, possibly combined with build - # This is based on your 1.0 configuration file or project settings - - run: sudo python ./setup.py develop && make check-3.6 - - run: cd ./test/stdlib && bash ./runtests.sh 'test_[p-z]*.py' - # Teardown - # If you break your build into multiple jobs with workflows, you will probably want to do the parts of this that are relevant in each - # Save test results - - store_test_results: - path: /tmp/circleci-test-results - # Save artifacts - - store_artifacts: - path: /tmp/circleci-artifacts - - store_artifacts: - path: /tmp/circleci-test-results + # Test + # This would typically be a build job when using workflows, possibly combined with build + # This is based on your 1.0 configuration file or project settings + - run: pip install -e . && make check-3.6 + - run: cd ./test/stdlib && bash ./runtests.sh 'test_[p-z]*.py' + # Teardown + # If you break your build into multiple jobs with workflows, you will probably want to do the parts of this that are relevant in each + # Save test results + - store_test_results: + path: /tmp/circleci-test-results + # Save artifacts + - store_artifacts: + path: /tmp/circleci-artifacts + - store_artifacts: + path: /tmp/circleci-test-results + # The resource_class feature allows configuring CPU and RAM resources for each job. Different resource classes are available for different executors. https://circleci.com/docs/2.0/configuration-reference/#resourceclass + resource_class: large diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index dc3041265..9e206c3cd 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -6,7 +6,7 @@ open_collective: # Replace with a single Open Collective username ko_fi: # Replace with a single Ko-fi username tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry -liberapay: # Replace with a single Liberapay username +liberapay: rocky issuehunt: # Replace with a single IssueHunt username otechie: # Replace with a single Otechie username custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md index cec076b49..0c91c7272 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.md +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -5,26 +5,32 @@ about: Tell us about uncompyle6 bugs --- + ## How to Reproduce - @@ -96,8 +102,9 @@ If this is too long, then try narrowing the problem to something short. Please modify for your setup - Uncompyle6 version: output from `uncompyle6 --version` or `pip show uncompyle6` +- xdis version: output from `pydisasm --version` or or `pip show xdis` - Python version for the version of Python the byte-compiled the file: `python -c "import sys; print(sys.version)"` where `python` is the correct CPython or PyPy binary. -- OS and Version: [e.g. Ubuntu bionic] +- OS and Version: [e.g., Ubuntu bionic] --> @@ -107,7 +114,11 @@ Please modify for your setup ## Priority - + ## Additional Context diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..4abc80f05 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1 @@ +blank_issues_enabled: False diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md index 28f64445c..68be2e4e5 100644 --- a/.github/ISSUE_TEMPLATE/feature-request.md +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -14,6 +14,14 @@ include same input and output. --> +## Priority + + + ## Tests -**Table of Contents** - -- [Get latest sources:](#get-latest-sources) -- [Change version in uncompyle6/version.py:](#change-version-in-uncompyle6versionpy) -- [Update ChangeLog:](#update-changelog) -- [Update NEWS.md from ChangeLog:](#update-newsmd-from-changelog) -- [Make sure pyenv is running and check newer versions](#make-sure-pyenv-is-running-and-check-newer-versions) -- [Switch to python-2.4, sync that up and build that first since it creates a tarball which we don't want.](#switch-to-python-24-sync-that-up-and-build-that-first-since-it-creates-a-tarball-which-we-dont-want) -- [Check against older versions](#check-against-older-versions) -- [Make packages and tag](#make-packages-and-tag) -- [Check package on github](#check-package-on-github) -- [Release on Github](#release-on-github) -- [Get onto PyPI](#get-onto-pypi) -- [Update tags:](#update-tags) - - -# Get latest sources: - - git pull - -# Change version in uncompyle6/version.py: - - $ emacs uncompyle6/version.py - $ source uncompyle6/version.py - $ echo $VERSION - $ git commit -m"Get ready for release $VERSION" . - -# Update ChangeLog: - - $ make ChangeLog - -# Update NEWS.md from ChangeLog: - - $ emacs NEWS.md - $ make check - $ git commit --amend . - $ git push # get CI testing going early - -# Make sure pyenv is running and check newer versions - - $ admin-tools/check-newer-versions.sh - -# Switch to python-2.4, sync that up and build that first since it creates a tarball which we don't want. - - $ source admin-tools/setup-python-2.4.sh - $ git merge master - # Add and fix merge conflicts - $ git commit - -# Check against older versions - - $ admin-tools/check-older-versions.sh - -# Make packages and tag - - $ . ./admin-tools/make-dist-older.sh - $ pyenv local 3.8.5 - $ twine check dist/uncompyle6-$VERSION* - $ git tag release-python-2.4-$VERSION - $ ./admin-tools/make-dist-newer.sh - $ twine check dist/uncompyle6-$VERSION* - -# Check package on github - - $ [[ ! -d /tmp/gittest ]] && mkdir /tmp/gittest; pushd /tmp/gittest - $ pyenv local 3.8.3 - $ pip install -e git://github.com/rocky/python-uncompyle6.git#egg=uncompyle6 - $ uncompyle6 --help - $ pip uninstall uncompyle6 - $ popd - -# Release on Github - -Goto https://github.com/rocky/python-uncompyle6/releases - -Now check the *tagged* release. (Checking the untagged release was previously done). - -Todo: turn this into a script in `admin-tools` - - $ pushd /tmp/gittest - $ pip install -e git://github.com/rocky/python-uncompyle6.git@$VERSION#egg=uncompyle6 - $ uncompyle6 --help - $ pip uninstall uncompyle6 - $ popd - - -# Get onto PyPI - - $ twine upload dist/uncompyle6-${VERSION}* - - -# Update tags: - - $ git push --tags - $ git pull --tags - -# Move dist files to uploaded - - $ mv -v dist/uncompyle6-${VERSION}* dist/uploaded diff --git a/admin-tools/make-dist-2.4-2.7.sh b/admin-tools/make-dist-2.4-2.7.sh index 0b1f520a5..eaf0b5cc0 100755 --- a/admin-tools/make-dist-2.4-2.7.sh +++ b/admin-tools/make-dist-2.4-2.7.sh @@ -3,9 +3,9 @@ PACKAGE=uncompyle6 # FIXME put some of the below in a common routine function finish { - cd $owd + cd $make_dist_uncompyle6_owd } -owd=$(pwd) +make_dist_uncompyle6_owd=$(pwd) trap finish EXIT cd $(dirname ${BASH_SOURCE[0]}) @@ -21,23 +21,31 @@ source $PACKAGE/version.py echo $__version__ for pyversion in $PYVERSIONS; do + echo --- $pyversion --- + if [[ ${pyversion:0:4} == "pypy" ]] ; then + echo "$pyversion - PyPy does not get special packaging" + continue + fi if ! pyenv local $pyversion ; then exit $? fi rm -fr build python setup.py bdist_egg + echo === $pyversion === done -pyenv local 2.7.18 -python setup.py bdist_wheel -mv -v dist/${PACKAGE}-$__version__-py2{.py3,}-none-any.whl +pyenv local 2.7 +# python setup.py bdist_wheel +# mv -v dist/${PACKAGE}-$__version__-py2{.py3,}-none-any.whl # Pypi can only have one source tarball. # Tarballs can get created from the above setup, so make sure to remove them since we want # the tarball from master. -tarball=dist/${PACKAGE}-${__version_}_-tar.gz +python ./setup.py sdist +tarball=dist/${PACKAGE}-${__version__}.tar.gz if [[ -f $tarball ]]; then - rm -v dist/${PACKAGE}-${__version__}-tar.gz + mv -v $tarball dist/${PACKAGE}_24-${__version__}.tar.gz fi +finish diff --git a/admin-tools/make-dist-3.0-3.2.sh b/admin-tools/make-dist-3.0-3.2.sh new file mode 100644 index 000000000..33ec1cf23 --- /dev/null +++ b/admin-tools/make-dist-3.0-3.2.sh @@ -0,0 +1,49 @@ +#!/bin/bash +PACKAGE=uncompyle6 + +# FIXME put some of the below in a common routine +function finish { + cd $uncompyle6_30_make_dist_owd +} + +cd $(dirname ${BASH_SOURCE[0]}) +uncompyle6_30_make_dist_owd=$(pwd) +trap finish EXIT + +if ! source ./pyenv-3.0-3.2-versions ; then + exit $? +fi +if ! source ./setup-python-3.0.sh ; then + exit $? +fi + +cd .. +source $PACKAGE/version.py +echo $__version__ + +for pyversion in $PYVERSIONS; do + echo --- $pyversion --- + if [[ ${pyversion:0:4} == "pypy" ]] ; then + echo "$pyversion - PyPy does not get special packaging" + continue + fi + if ! pyenv local $pyversion ; then + exit $? + fi + # pip bdist_egg create too-general wheels. So + # we narrow that by moving the generated wheel. + + # Pick out first two number of version, e.g. 3.5.1 -> 35 + first_two=$(echo $pyversion | cut -d'.' -f 1-2 | sed -e 's/\.//') + rm -fr build + python setup.py bdist_egg bdist_wheel + mv -v dist/${PACKAGE}-$__version__-{py3,py$first_two}-none-any.whl + echo === $pyversion === +done + +python ./setup.py sdist +tarball=dist/${PACKAGE}-${__version__}.tar.gz +if [[ -f $tarball ]]; then + mv -v $tarball dist/${PACKAGE}_30-${__version__}.tar.gz +fi +finish diff --git a/admin-tools/make-dist-3.3-3.5.sh b/admin-tools/make-dist-3.3-3.5.sh index 95426ffb1..4588a43d2 100755 --- a/admin-tools/make-dist-3.3-3.5.sh +++ b/admin-tools/make-dist-3.3-3.5.sh @@ -3,11 +3,11 @@ PACKAGE=uncompyle6 # FIXME put some of the below in a common routine function finish { - cd $owd + cd $uncompyle6_33_make_owd } cd $(dirname ${BASH_SOURCE[0]}) -owd=$(pwd) +uncompyle6_33_make_owd=$(pwd) trap finish EXIT if ! source ./pyenv-3.3-3.5-versions ; then @@ -22,6 +22,11 @@ source $PACKAGE/version.py echo $__version__ for pyversion in $PYVERSIONS; do + echo --- $pyversion --- + if [[ ${pyversion:0:4} == "pypy" ]] ; then + echo "$pyversion - PyPy does not get special packaging" + continue + fi if ! pyenv local $pyversion ; then exit $? fi @@ -32,7 +37,13 @@ for pyversion in $PYVERSIONS; do first_two=$(echo $pyversion | cut -d'.' -f 1-2 | sed -e 's/\.//') rm -fr build python setup.py bdist_egg bdist_wheel - mv -v dist/${PACKAGE}-$__version__-{py2.py3,py$first_two}-none-any.whl + mv -v dist/${PACKAGE}-$__version__-{py3,py$first_two}-none-any.whl + echo === $pyversion === done python ./setup.py sdist +tarball=dist/${PACKAGE}-${__version__}.tar.gz +if [[ -f $tarball ]]; then + mv -v $tarball dist/${PACKAGE}_33-${__version__}.tar.gz +fi +finish diff --git a/admin-tools/make-dist-3.6-3.10.sh b/admin-tools/make-dist-3.6-3.10.sh new file mode 100755 index 000000000..45178debf --- /dev/null +++ b/admin-tools/make-dist-3.6-3.10.sh @@ -0,0 +1,49 @@ +#!/bin/bash +PACKAGE=uncompyle6 + +# FIXME put some of the below in a common routine +function finish { + cd $uncompyle6_36_make_owd +} + +cd $(dirname ${BASH_SOURCE[0]}) +uncompyle6_36_make_owd=$(pwd) +trap finish EXIT + +if ! source ./pyenv-3.6-3.10-versions ; then + exit $? +fi +if ! source ./setup-python-3.6.sh ; then + exit $? +fi + +cd .. +source $PACKAGE/version.py +echo $__version__ + +for pyversion in $PYVERSIONS; do + echo --- $pyversion --- + if [[ ${pyversion:0:4} == "pypy" ]] ; then + echo "$pyversion - PyPy does not get special packaging" + continue + fi + if ! pyenv local $pyversion ; then + exit $? + fi + # pip bdist_egg create too-general wheels. So + # we narrow that by moving the generated wheel. + + # Pick out first two number of version, e.g. 3.5.1 -> 35 + first_two=$(echo $pyversion | cut -d'.' -f 1-2 | sed -e 's/\.//') + rm -fr build + python setup.py bdist_egg bdist_wheel + mv -v dist/${PACKAGE}-$__version__-{py3,py$first_two}-none-any.whl + echo === $pyversion === +done + +python ./setup.py sdist +tarball=dist/${PACKAGE}-${__version__}.tar.gz +if [[ -f $tarball ]]; then + mv -v $tarball dist/${PACKAGE}_36-${__version__}.tar.gz +fi +finish diff --git a/admin-tools/make-dist-newest.sh b/admin-tools/make-dist-newest.sh index af04b060b..5893dd897 100755 --- a/admin-tools/make-dist-newest.sh +++ b/admin-tools/make-dist-newest.sh @@ -3,11 +3,13 @@ PACKAGE=uncompyle6 # FIXME put some of the below in a common routine function finish { - cd $owd + if [[ -n "$make_uncompyle6_newest_owd" ]] then + cd $make_uncompyle6_newest_owd + fi } cd $(dirname ${BASH_SOURCE[0]}) -owd=$(pwd) +make_uncompyle6_newest_owd=$(pwd) trap finish EXIT if ! source ./pyenv-newest-versions ; then @@ -21,18 +23,10 @@ cd .. source $PACKAGE/version.py echo $__version__ -for pyversion in $PYVERSIONS; do - if ! pyenv local $pyversion ; then - exit $? - fi - # pip bdist_egg create too-general wheels. So - # we narrow that by moving the generated wheel. +# Python 3.12 and 3.13 are more restrictive in packaging +pyenv local 3.11 - # Pick out first two number of version, e.g. 3.5.1 -> 35 - first_two=$(echo $pyversion | cut -d'.' -f 1-2 | sed -e 's/\.//') - rm -fr build - python setup.py bdist_egg bdist_wheel - mv -v dist/${PACKAGE}-$__version__-{py2.py3,py$first_two}-none-any.whl -done - -python ./setup.py sdist +rm -fr build +pip wheel --wheel-dir=dist . +python -m build --sdist +finish diff --git a/admin-tools/merge-for-2.4.sh b/admin-tools/merge-for-2.4.sh new file mode 100755 index 000000000..b110e71e9 --- /dev/null +++ b/admin-tools/merge-for-2.4.sh @@ -0,0 +1,7 @@ +#/bin/bash +uncompyle6_merge_24_owd=$(pwd) +cd $(dirname ${BASH_SOURCE[0]}) +if . ./setup-python-2.4.sh; then + git merge python-3.0-to-3.2 +fi +cd $uncompyle6_merge_24_owd diff --git a/admin-tools/merge-for-3.0.sh b/admin-tools/merge-for-3.0.sh new file mode 100755 index 000000000..ef1f9100f --- /dev/null +++ b/admin-tools/merge-for-3.0.sh @@ -0,0 +1,7 @@ +#/bin/bash +uncompyle6_merge_30_owd=$(pwd) +cd $(dirname ${BASH_SOURCE[0]}) +if . ./setup-python-3.0.sh; then + git merge python-3.3-to-3.5 +fi +cd $uncompyle6_merge_30_owd diff --git a/admin-tools/merge-for-3.3.sh b/admin-tools/merge-for-3.3.sh new file mode 100755 index 000000000..8b73afeec --- /dev/null +++ b/admin-tools/merge-for-3.3.sh @@ -0,0 +1,7 @@ +#/bin/bash +uncompyle6_merge_33_owd=$(pwd) +cd $(dirname ${BASH_SOURCE[0]}) +if . ./setup-python-3.3.sh; then + git merge python-3.6-to-3.10 +fi +cd $uncompyle6_merge_33_owd diff --git a/admin-tools/merge-for-3.6.sh b/admin-tools/merge-for-3.6.sh new file mode 100755 index 000000000..82c743354 --- /dev/null +++ b/admin-tools/merge-for-3.6.sh @@ -0,0 +1,7 @@ +#/bin/bash +uncompyle6_merge_36_owd=$(pwd) +cd $(dirname ${BASH_SOURCE[0]}) +if . ./setup-python-3.6.sh; then + git merge master +fi +cd $uncompyle6_merge_36_owd diff --git a/admin-tools/pyenv-3.1-3.2-versions b/admin-tools/pyenv-3.0-3.2-versions similarity index 86% rename from admin-tools/pyenv-3.1-3.2-versions rename to admin-tools/pyenv-3.0-3.2-versions index 334a2631c..df84c5bf8 100644 --- a/admin-tools/pyenv-3.1-3.2-versions +++ b/admin-tools/pyenv-3.0-3.2-versions @@ -6,4 +6,4 @@ if [[ $0 == ${BASH_SOURCE[0]} ]] ; then echo "This script should be *sourced* rather than run directly through bash" exit 1 fi -export PYVERSIONS='3.1.5 3.2.6' +export PYVERSIONS='3.0.1 3.1.5 3.2.6' diff --git a/admin-tools/pyenv-3.6-3.10-versions b/admin-tools/pyenv-3.6-3.10-versions new file mode 100644 index 000000000..aa7eabca6 --- /dev/null +++ b/admin-tools/pyenv-3.6-3.10-versions @@ -0,0 +1,8 @@ +# -*- shell-script -*- +# Sets PYVERSIONS to be pyenv versions that +# we can use in the master branch. +if [[ $0 == ${BASH_SOURCE[0]} ]] ; then + echo "This script should be *sourced* rather than run directly through bash" + exit 1 +fi +export PYVERSIONS='3.6 pypy3.6 pypy3.7 pypy3.810 pyston-2.3.5 3.8 3.9 3.10' diff --git a/admin-tools/pyenv-newest-versions b/admin-tools/pyenv-newest-versions index 509daf6f3..aeac11e36 100644 --- a/admin-tools/pyenv-newest-versions +++ b/admin-tools/pyenv-newest-versions @@ -5,4 +5,4 @@ if [[ $0 == ${BASH_SOURCE[0]} ]] ; then echo "This script should be *sourced* rather than run directly through bash" exit 1 fi -export PYVERSIONS='3.6.15 3.7.13 pypy3.6-7.3.0 pyston-2.3.2 3.8.13 3.9.12 3.10.4' +export PYVERSIONS='3.11 3.12 3.13' diff --git a/admin-tools/pyenv-versions b/admin-tools/pyenv-versions index 9ce703325..7c1bcafa7 100644 --- a/admin-tools/pyenv-versions +++ b/admin-tools/pyenv-versions @@ -5,4 +5,4 @@ if [[ $0 == ${BASH_SOURCE[0]} ]] ; then echo "This script should be *sourced* rather than run directly through bash" exit 1 fi -export PYVERSIONS='3.7.11 3.8.12 3.9.7 3.10.0' +export PYVERSIONS='3.7.13 pyston-2.3.3 3.8.13' diff --git a/admin-tools/setup-master.sh b/admin-tools/setup-master.sh index f9564aafc..7c5c55006 100755 --- a/admin-tools/setup-master.sh +++ b/admin-tools/setup-master.sh @@ -1,31 +1,20 @@ #!/bin/bash -PYTHON_VERSION=3.7.13 +# Check out master branch and dependent development master branches +PYTHON_VERSION=3.13 -function checkout_version { - local repo=$1 - version=${2:-master} - echo Checking out $version on $repo ... - (cd ../$repo && git checkout $version && pyenv local $PYTHON_VERSION) && \ - git pull - return $? -} - -# FIXME put some of the below in a common routine -function finish { - cd $owd -} - -export PATH=$HOME/.pyenv/bin/pyenv:$PATH -owd=$(pwd) bs=${BASH_SOURCE[0]} if [[ $0 == $bs ]] ; then echo "This script should be *sourced* rather than run directly through bash" exit 1 fi + +uncompyle6_owd=$(pwd) mydir=$(dirname $bs) fulldir=$(readlink -f $mydir) +cd $mydir +. ./checkout_common.sh cd $fulldir/.. -(cd $fulldir/.. && checkout_version python-spark && checkout_version python-xdis && - checkout_version python-uncompyle6) -cd $owd -rm -v */.python-version >/dev/null 2>&1 || true +(cd $fulldir/.. && \ + setup_version python-spark master && \ + setup_version python-xdis master ) +checkout_finish master diff --git a/admin-tools/setup-python-2.4.sh b/admin-tools/setup-python-2.4.sh index 8d42f1b67..ca48c43d5 100755 --- a/admin-tools/setup-python-2.4.sh +++ b/admin-tools/setup-python-2.4.sh @@ -1,24 +1,23 @@ #!/bin/bash -PYTHON_VERSION=2.4.6 +# Check out python-2.4-to-2.7 and dependent development branches. -function checkout_version { - local repo=$1 - version=${2:-python-2.4} - echo Checking out $version.4 on $repo ... - (cd ../$repo && git checkout $version && pyenv local $PYTHON_VERSION) && \ - git pull - return $? -} - -owd=$(pwd) bs=${BASH_SOURCE[0]} if [[ $0 == $bs ]] ; then echo "This script should be *sourced* rather than run directly through bash" exit 1 fi + +PYTHON_VERSION=2.4 + +uncompyle6_owd=$(pwd) mydir=$(dirname $bs) fulldir=$(readlink -f $mydir) -(cd $fulldir/.. && checkout_version python-spark && checkout_version python-xdis python-2.4-to-2.7 && - checkout_version python-uncompyle6) -cd $owd -rm -v */.python-version || true +cd $mydir +. ./checkout_common.sh + +(cd $fulldir/.. && \ + setup_version python-spark python-2.4 && \ + setup_version python-xdis python-2.4) + + +checkout_finish python-2.4-to-2.7 diff --git a/admin-tools/setup-python-3.0.sh b/admin-tools/setup-python-3.0.sh new file mode 100644 index 000000000..f4e81395f --- /dev/null +++ b/admin-tools/setup-python-3.0.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Check out python-3.0-to-3.2 and dependent development branches. +bs=${BASH_SOURCE[0]} +if [[ $0 == $bs ]] ; then + echo "This script should be *sourced* rather than run directly through bash" + exit 1 +fi + +PYTHON_VERSION=3.0 + +uncompyle6_owd=$(pwd) +mydir=$(dirname $bs) +fulldir=$(readlink -f $mydir) +cd $mydir +. ./checkout_common.sh +(cd $fulldir/.. && \ + setup_version python-spark python-3.0 && \ + setup_version python-xdis python-3.0) + +checkout_finish python-3.0-to-3.2 diff --git a/admin-tools/setup-python-3.3.sh b/admin-tools/setup-python-3.3.sh index 4e244df34..18fee07c2 100755 --- a/admin-tools/setup-python-3.3.sh +++ b/admin-tools/setup-python-3.3.sh @@ -1,35 +1,22 @@ #!/bin/bash -PYTHON_VERSION=3.3.7 -pyenv local $PYTHON_VERSION - -# FIXME put some of the below in a common routine -function checkout_version { - local repo=$1 - version=${2:-python-3.3-to-3.5} - echo Checking out $version on $repo ... - (cd ../$repo && git checkout $version && pyenv local $PYTHON_VERSION) && \ - git pull - return $? -} - -function finish { - cd $owd -} - -export PATH=$HOME/.pyenv/bin/pyenv:$PATH -owd=$(pwd) +# Check out python-3.3-to-3.5 and dependent development branches. bs=${BASH_SOURCE[0]} if [[ $0 == $bs ]] ; then echo "This script should be *sourced* rather than run directly through bash" exit 1 fi +PYTHON_VERSION=3.3 +pyenv local $PYTHON_VERSION + +uncompyle6_owd=$(pwd) mydir=$(dirname $bs) +cd $mydir fulldir=$(readlink -f $mydir) +. ./checkout_common.sh cd $fulldir/.. -(cd $fulldir/.. && checkout_version python-spark master && checkout_version python-xdis && - checkout_version python-uncompyle6) -cd $owd -rm -v */.python-version || true +(cd $fulldir/.. && \ + setup_version python-spark python-3.3 && \ + setup_version python-xdis python-3.3 ) -git checkout python-3.3-to-3.5 && git pull && pyenv local $PYTHON_VERSION +checkout_finish python-3.3-to-3.5 diff --git a/admin-tools/setup-python-3.6.sh b/admin-tools/setup-python-3.6.sh new file mode 100755 index 000000000..2b0dae68b --- /dev/null +++ b/admin-tools/setup-python-3.6.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# Check out python-3.6-to-3.10 and dependent development branches. + +PYTHON_VERSION=3.6 + +bs=${BASH_SOURCE[0]} +if [[ $0 == $bs ]] ; then + echo "This script should be *sourced* rather than run directly through bash" + exit 1 +fi + +uncompyle6_owd=$(pwd) +mydir=$(dirname $bs) +cd $mydir +fulldir=$(readlink -f $mydir) +. ./checkout_common.sh +cd $fulldir/.. +(cd $fulldir/.. && \ + setup_version python-spark python-3.6 && \ + setup_version python-xdis python-3.6 ) + +checkout_finish python-3.6-to-3.10 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..ea7cec9b0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,68 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +authors = [ + {name = "Rocky Bernstein", email = "rb@dustyfeet.com"}, +] + +name = "uncompyle6" +description = "Python cross-version byte-code library and disassembler" +dependencies = [ + "click", + "spark-parser >= 1.8.9, < 1.9.2", + "xdis >= 6.2", +] +readme = "README.rst" + +# Newer lingo +license = "GPL-3.0-or-later" + +keywords = ["Python bytecode", "bytecode", "disassembler"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Programming Language :: Python", + "Topic :: Software Development :: Libraries :: Python Modules", + "Programming Language :: Python :: 2.4", + "Programming Language :: Python :: 2.5", + "Programming Language :: Python :: 2.6", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.0", + "Programming Language :: Python :: 3.1", + "Programming Language :: Python :: 3.2", + "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: Implementation :: PyPy", +] +dynamic = ["version"] + +[project.urls] +Homepage = "https://github.com/rocky/python-uncompyle6" +Downloads = "https://github.com/rocky/python-uncompyle6/releases" + +[project.optional-dependencies] +dev = [ + "pre-commit", + "pytest", +] + +[project.scripts] +uncompyle6 = "uncompyle6.bin.uncompile:main_bin" +uncompyle6-tokenize = "uncompyle6.bin.pydisassemble:main" + +[tool.setuptools.dynamic] +version = {attr = "uncompyle6.version.__version__"} + +[tool.setuptools.packages.find] +include = ["uncompyle6*"] # Include all subpackages diff --git a/pytest/Makefile b/pytest/Makefile index cb4c0caa6..16ab89a64 100644 --- a/pytest/Makefile +++ b/pytest/Makefile @@ -7,5 +7,5 @@ PYTHON ?= python test check pytest: @PYTHON_VERSION=`$(PYTHON) -V 2>&1 | cut -d ' ' -f 2 | cut -d'.' -f1,2`; \ if [[ $$PYTHON_VERSION > 3.2 ]] || [[ $$PYTHON_VERSION == 2.7 ]] || [[ $$PYTHON_VERSION == 2.6 ]]; then \ - py.test; \ + $(PYTHON) -m pytest .; \ fi diff --git a/pytest/test_disasm.py b/pytest/test_disasm.py index 97ed847c6..27f4ad36b 100644 --- a/pytest/test_disasm.py +++ b/pytest/test_disasm.py @@ -1,7 +1,7 @@ import os.path import pytest -from uncompyle6.disas import disassemble_file +from uncompyle6.code_fns import disassemble_file def get_srcdir(): filename = os.path.normcase(os.path.dirname(__file__)) diff --git a/pytest/test_fjt.py b/pytest/test_fjt.py index 6c91aa4da..9a59124a0 100644 --- a/pytest/test_fjt.py +++ b/pytest/test_fjt.py @@ -27,7 +27,7 @@ def test_if_in_for(): fjt = scan.find_jump_targets(False) ## FIXME: the data below is wrong. - ## we get different results currenty as well. + ## we get different results currently as well. ## We need to probably fix both the code ## and the test below # assert {15: [3], 69: [66], 63: [18]} == fjt diff --git a/pytest/test_grammar.py b/pytest/test_grammar.py index 75ff86bd6..44d75e53d 100644 --- a/pytest/test_grammar.py +++ b/pytest/test_grammar.py @@ -74,6 +74,11 @@ def check_tokens(tokens, opcode_set): pass pass + if PYTHON_VERSION_TRIPLE >= (3, 7): + expect_lhs.add("set_for") + unused_rhs.add("set_iter") + pass + pass # FIXME if PYTHON_VERSION_TRIPLE < (3, 8): assert expect_lhs == set(lhs) diff --git a/pytest/validate.py b/pytest/validate.py index f1dd01934..4a730fb28 100644 --- a/pytest/validate.py +++ b/pytest/validate.py @@ -67,7 +67,7 @@ def are_instructions_equal(i1, i2): Determine if two instructions are approximately equal, ignoring certain fields which we allow to differ, namely: - * code objects are ignore (should probaby be checked) due to address + * code objects are ignore (should probably be checked) due to address * line numbers :param i1: left instruction to compare diff --git a/requirements.txt b/requirements.txt index 5b1e7b65d..fd98f9ff9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,8 @@ hypothesis==2.0.0 pytest -e . + +Click~=7.0 +xdis>=6.0.4 +configobj~=5.0.6 +setuptools diff --git a/setup-pretoml.py b/setup-pretoml.py new file mode 100644 index 000000000..57f786c54 --- /dev/null +++ b/setup-pretoml.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +import sys + +import setuptools + +"""Setup script for the 'uncompyle6' distribution.""" + +SYS_VERSION = sys.version_info[0:2] +if SYS_VERSION < (3, 6): + mess = "Python Release 3.6 .. 3.12 are supported in this code branch." + if (2, 4) <= SYS_VERSION <= (2, 7): + mess += ( + "\nFor your Python, version %s, use the python-2.4 code/branch." + % sys.version[0:3] + ) + if SYS_VERSION >= (3, 6): + mess += ( + "\nFor your Python, version %s, use the master code/branch." + % sys.version[0:3] + ) + if (3, 0) >= SYS_VERSION < (3, 3): + mess += ( + "\nFor your Python, version %s, use the python-3.0-to-3.2 code/branch." + % sys.version[0:3] + ) + if (3, 3) >= SYS_VERSION < (3, 6): + mess += ( + "\nFor your Python, version %s, use the python-3.3-to-3.5 code/branch." + % sys.version[0:3] + ) + elif SYS_VERSION < (2, 4): + mess += ( + "\nThis package is not supported for Python version %s." % sys.version[0:3] + ) + print(mess) + raise Exception(mess) + +from __pkginfo__ import ( + __version__, + author, + author_email, + classifiers, + entry_points, + install_requires, + license, + long_description, + modname, + py_modules, + short_desc, + web, + zip_safe, +) + +setuptools.setup( + author=author, + author_email=author_email, + classifiers=classifiers, + description=short_desc, + entry_points=entry_points, + install_requires=install_requires, + license=license, + long_description=long_description, + long_description_content_type="text/x-rst", + name=modname, + packages=setuptools.find_packages(), + py_modules=py_modules, + test_suite="nose.collector", + url=web, + version=__version__, + zip_safe=zip_safe, +) diff --git a/setup.cfg b/setup.cfg index 7231ec473..bfef9f4a7 100755 --- a/setup.cfg +++ b/setup.cfg @@ -12,10 +12,11 @@ doc_files = README.rst # examples/ [bdist_wheel] -universal=1 +universal = no [metadata] description_file = README.rst +licences_files = COPYING [flake8] # max-line-length setting: NO we do not want everyone writing 120-character lines! diff --git a/setup.py b/setup.py old mode 100755 new mode 100644 index c88e73628..e81bbe7a1 --- a/setup.py +++ b/setup.py @@ -1,62 +1,28 @@ -#!/usr/bin/env python +""" + Check that the Python version running this is compatible with this installation medium. + Note: that we use 2.x compatible Python code here. +""" import sys +from setuptools import setup -"""Setup script for the 'uncompyle6' distribution.""" +major = sys.version_info[0] +minor = sys.version_info[1] -SYS_VERSION = sys.version_info[0:2] -if not ((2, 4) <= SYS_VERSION < (3, 11)): - mess = "Python Release 2.6 .. 3.10 are supported in this code branch." - if (2, 4) <= SYS_VERSION <= (2, 7): - mess += ( - "\nFor your Python, version %s, use the python-2.4 code/branch." - % sys.version[0:3] - ) - if (3, 3) <= SYS_VERSION < (3, 6): - mess += ( - "\nFor your Python, version %s, use the python-3.3-to-3.5 code/branch." - % sys.version[0:3] - ) - elif SYS_VERSION < (2, 4): - mess += ( - "\nThis package is not supported for Python version %s." % sys.version[0:3] - ) - print(mess) - raise Exception(mess) +if major != 3 or not minor >= 11: + sys.stderr.write("This installation medium is only for Python 3.11 and later. You are running Python %s.%s.\n" % (major, minor)) -from __pkginfo__ import ( - author, - author_email, - install_requires, - license, - long_description, - classifiers, - entry_points, - modname, - py_modules, - short_desc, - __version__, - web, - zip_safe, -) +if major == 3 and 6 <= minor <= 10: + sys.stderr.write("Please install using uncompyle6_36-x.y.z.tar.gz from https://github.com/rocky/python-uncompyle6/releases\n") + sys.exit(1) +elif major == 3 and 3 <= minor <= 5: + sys.stderr.write("Please install using uncompyle6_33-x.y.z.tar.gz from https://github.com/rocky/python-uncompyle6/releases\n") + sys.exit(1) +if major == 3 and 0 <= minor <= 2: + sys.stderr.write("Please install using uncompyle6_30-x.y.z.tar.gz from https://github.com/rocky/python-uncompyle6/releases\n") + sys.exit(1) +elif major == 2: + sys.stderr.write("Please install using uncompyle6_24-x.y.z.tar.gz from https://github.com/rocky/python-uncompyle6/releases\n") + sys.exit(1) -from setuptools import setup, find_packages -setup( - author=author, - author_email=author_email, - classifiers=classifiers, - description=short_desc, - entry_points=entry_points, - install_requires=install_requires, - license=license, - long_description=long_description, - long_description_content_type="text/x-rst", - name=modname, - packages=find_packages(), - py_modules=py_modules, - test_suite="nose.collector", - url=web, - tests_require=["nose>=1.0"], - version=__version__, - zip_safe=zip_safe, -) +setup() diff --git a/test/Makefile b/test/Makefile index c6c69e571..4e941f83b 100644 --- a/test/Makefile +++ b/test/Makefile @@ -80,11 +80,9 @@ check-3.8: check-bytecode $(PYTHON) test_pythonlib.py --bytecode-3.8-run --verify-run $(PYTHON) test_pythonlib.py --bytecode-3.8 --syntax-verify $(COMPILE) -check-3.9: check-bytecode - @echo "Note that we do not support decompiling Python 3.9 bytecode - no 3.9 tests run" +check-3.9 check-3.10 check-3.11 check-3.12 check-3.13: check-bytecode + @echo "Note that we do not support decompiling this version's bytecode - no 3.9 tests run" -check-3.10: check-bytecode - @echo "Note that we do not support decompiling Python 3.10 bytecode - no 3.10 tests run" # FIXME #: this is called when running under pypy3.5-5.8.0, pypy2-5.6.0, pypy3.6-7.3.0 or pypy3.8-7.3.7 @@ -115,7 +113,7 @@ check-bytecode-2: # FIXME: Until we shaked out problems with xdis... check-bytecode-3: $(PYTHON) test_pythonlib.py \ - --bytecode-3.4 --bytecode-3.5 --bytecode-3.6 \ + --bytecode-3.3 --bytecode-3.4 --bytecode-3.5 --bytecode-3.6 \ --bytecode-3.7 --bytecode-3.8 #: Check deparsing on selected bytecode 3.x @@ -317,6 +315,10 @@ check-bytecode-3.8: $(PYTHON) test_pythonlib.py --bytecode-3.8-run --verify-run $(PYTHON) test_pythonlib.py --bytecode-3.8 --syntax-verify +#: Check deparsing Python 3.8 +check-bytecode-3.9 check-bytecode-3.10 check-bytecode-3.11 check-bytecode-3.12 check-bytecode-3.13: + @echo "We don't support decompiling this bytecode for this version" + #: short tests for bytecodes only for this version of Python check-native-short: $(PYTHON) test_pythonlib.py --bytecode-$(PYTHON_VERSION) --syntax-verify $(COMPILE) diff --git a/test/bytecode_1.0/posixpath.pyc b/test/bytecode_1.0/posixpath.pyc new file mode 100644 index 000000000..e5961de6f Binary files /dev/null and b/test/bytecode_1.0/posixpath.pyc differ diff --git a/test/bytecode_1.0/stat.pyc b/test/bytecode_1.0/stat.pyc new file mode 100644 index 000000000..53dcf0ee3 Binary files /dev/null and b/test/bytecode_1.0/stat.pyc differ diff --git a/test/bytecode_2.4/07_try_except.pyc b/test/bytecode_2.4/07_try_except.pyc new file mode 100644 index 000000000..7c6a6566c Binary files /dev/null and b/test/bytecode_2.4/07_try_except.pyc differ diff --git a/test/bytecode_2.5/01_rel_import.pyc b/test/bytecode_2.5/01_rel_import.pyc new file mode 100644 index 000000000..eff913781 Binary files /dev/null and b/test/bytecode_2.5/01_rel_import.pyc differ diff --git a/test/bytecode_2.5/06_if_and_bugs.pyc b/test/bytecode_2.5/06_if_and_bugs.pyc new file mode 100644 index 000000000..f3fb33e46 Binary files /dev/null and b/test/bytecode_2.5/06_if_and_bugs.pyc differ diff --git a/test/bytecode_2.5/07_try_except.pyc b/test/bytecode_2.5/07_try_except.pyc new file mode 100644 index 000000000..f5634516a Binary files /dev/null and b/test/bytecode_2.5/07_try_except.pyc differ diff --git a/test/bytecode_2.5_run/05_long_literals24.pyc b/test/bytecode_2.5_run/05_long_literals24.pyc new file mode 100644 index 000000000..ffae445b0 Binary files /dev/null and b/test/bytecode_2.5_run/05_long_literals24.pyc differ diff --git a/test/bytecode_2.6/01_rel_import.pyc b/test/bytecode_2.6/01_rel_import.pyc new file mode 100644 index 000000000..d399eadf3 Binary files /dev/null and b/test/bytecode_2.6/01_rel_import.pyc differ diff --git a/test/bytecode_2.7/01_rel_import.pyc b/test/bytecode_2.7/01_rel_import.pyc index 46c146daa..8e516b37d 100644 Binary files a/test/bytecode_2.7/01_rel_import.pyc and b/test/bytecode_2.7/01_rel_import.pyc differ diff --git a/test/bytecode_2.7/06_nop.pyc b/test/bytecode_2.7/06_nop.pyc new file mode 100644 index 000000000..0f6f966d1 Binary files /dev/null and b/test/bytecode_2.7/06_nop.pyc differ diff --git a/test/bytecode_2.7/16_bytestring_docstring.pyc b/test/bytecode_2.7/16_bytestring_docstring.pyc new file mode 100644 index 000000000..0d4114e12 Binary files /dev/null and b/test/bytecode_2.7/16_bytestring_docstring.pyc differ diff --git a/test/bytecode_2.7_run/03_comprehension_in_lambda.pyc b/test/bytecode_2.7_run/03_comprehension_in_lambda.pyc new file mode 100644 index 000000000..e7b3586a3 Binary files /dev/null and b/test/bytecode_2.7_run/03_comprehension_in_lambda.pyc differ diff --git a/test/bytecode_2.7_run/05_long_literals.pyc b/test/bytecode_2.7_run/05_long_literals.pyc new file mode 100644 index 000000000..21d4540e5 Binary files /dev/null and b/test/bytecode_2.7_run/05_long_literals.pyc differ diff --git a/test/bytecode_2.7_run/07_for_if_else-continue.pyc-notyet b/test/bytecode_2.7_run/07_for_if_else-continue.pyc-notyet new file mode 100644 index 000000000..616fa031d Binary files /dev/null and b/test/bytecode_2.7_run/07_for_if_else-continue.pyc-notyet differ diff --git a/test/bytecode_3.1_run/03_comprehension_in_lambda.pyc b/test/bytecode_3.1_run/03_comprehension_in_lambda.pyc new file mode 100644 index 000000000..983f747db Binary files /dev/null and b/test/bytecode_3.1_run/03_comprehension_in_lambda.pyc differ diff --git a/test/bytecode_3.3/03_ifelse_in_lambda.pyc b/test/bytecode_3.3/03_ifelse_in_lambda.pyc new file mode 100644 index 000000000..7628206f9 Binary files /dev/null and b/test/bytecode_3.3/03_ifelse_in_lambda.pyc differ diff --git a/test/bytecode_3.3_run/02_make_closure.pyc b/test/bytecode_3.3_run/02_make_closure.pyc new file mode 100644 index 000000000..0828c6cb8 Binary files /dev/null and b/test/bytecode_3.3_run/02_make_closure.pyc differ diff --git a/test/bytecode_3.4/03_ifelse_in_lambda.pyc b/test/bytecode_3.4/03_ifelse_in_lambda.pyc new file mode 100644 index 000000000..01df985d0 Binary files /dev/null and b/test/bytecode_3.4/03_ifelse_in_lambda.pyc differ diff --git a/test/bytecode_3.4_run/02_make_closure.pyc b/test/bytecode_3.4_run/02_make_closure.pyc new file mode 100644 index 000000000..5cfb04469 Binary files /dev/null and b/test/bytecode_3.4_run/02_make_closure.pyc differ diff --git a/test/bytecode_3.5/02_for_else_bug.pyc b/test/bytecode_3.5/02_for_else_bug.pyc new file mode 100644 index 000000000..ace423162 Binary files /dev/null and b/test/bytecode_3.5/02_for_else_bug.pyc differ diff --git a/test/bytecode_3.5/03_ifelse_in_lambda.pyc b/test/bytecode_3.5/03_ifelse_in_lambda.pyc new file mode 100644 index 000000000..33b8757f6 Binary files /dev/null and b/test/bytecode_3.5/03_ifelse_in_lambda.pyc differ diff --git a/test/bytecode_3.6/03_async_from_coroutine.pyc b/test/bytecode_3.6/03_async_from_coroutine.pyc new file mode 100644 index 000000000..da634c61a Binary files /dev/null and b/test/bytecode_3.6/03_async_from_coroutine.pyc differ diff --git a/test/bytecode_3.6/06_nop.pyc b/test/bytecode_3.6/06_nop.pyc new file mode 100644 index 000000000..f0420f2b4 Binary files /dev/null and b/test/bytecode_3.6/06_nop.pyc differ diff --git a/test/bytecode_3.6/09_long_whilestmt.pyc b/test/bytecode_3.6/09_long_whilestmt.pyc new file mode 100644 index 000000000..7612eb31e Binary files /dev/null and b/test/bytecode_3.6/09_long_whilestmt.pyc differ diff --git a/test/bytecode_3.6_run/05_long_literals.pyc b/test/bytecode_3.6_run/05_long_literals.pyc index e73e3a0f4..201a1c68b 100644 Binary files a/test/bytecode_3.6_run/05_long_literals.pyc and b/test/bytecode_3.6_run/05_long_literals.pyc differ diff --git a/test/bytecode_3.6_run/08_test_contextmanager.pyc b/test/bytecode_3.6_run/08_test_contextmanager.pyc new file mode 100644 index 000000000..999815b0d Binary files /dev/null and b/test/bytecode_3.6_run/08_test_contextmanager.pyc differ diff --git a/test/bytecode_3.6_run/10_fstring.pyc b/test/bytecode_3.6_run/10_fstring.pyc index b54bc4915..c3ae23255 100644 Binary files a/test/bytecode_3.6_run/10_fstring.pyc and b/test/bytecode_3.6_run/10_fstring.pyc differ diff --git a/test/bytecode_3.7/00_while_true_pass.pyc b/test/bytecode_3.7/00_while_true_pass.pyc new file mode 100644 index 000000000..d03bde5bd Binary files /dev/null and b/test/bytecode_3.7/00_while_true_pass.pyc differ diff --git a/test/bytecode_3.7/02_while1_if_while1.pyc b/test/bytecode_3.7/02_while1_if_while1.pyc new file mode 100644 index 000000000..1e8cf9176 Binary files /dev/null and b/test/bytecode_3.7/02_while1_if_while1.pyc differ diff --git a/test/bytecode_3.7/03_async_from_coroutine.pyc b/test/bytecode_3.7/03_async_from_coroutine.pyc new file mode 100644 index 000000000..7aad71ede Binary files /dev/null and b/test/bytecode_3.7/03_async_from_coroutine.pyc differ diff --git a/test/bytecode_3.7_run/03_comprehension_in_lambda.pyc b/test/bytecode_3.7_run/03_comprehension_in_lambda.pyc new file mode 100644 index 000000000..7a80bf5cc Binary files /dev/null and b/test/bytecode_3.7_run/03_comprehension_in_lambda.pyc differ diff --git a/test/bytecode_3.7_run/05_long_literals.pyc b/test/bytecode_3.7_run/05_long_literals.pyc index 466bb751c..f0ab39de1 100644 Binary files a/test/bytecode_3.7_run/05_long_literals.pyc and b/test/bytecode_3.7_run/05_long_literals.pyc differ diff --git a/test/bytecode_3.7_run/10_extendedargifelse.pyc b/test/bytecode_3.7_run/10_extendedargifelse.pyc new file mode 100644 index 000000000..3dda1b31e Binary files /dev/null and b/test/bytecode_3.7_run/10_extendedargifelse.pyc differ diff --git a/test/bytecode_3.8/00_while_true_pass.pyc b/test/bytecode_3.8/00_while_true_pass.pyc new file mode 100644 index 000000000..2bc34752e Binary files /dev/null and b/test/bytecode_3.8/00_while_true_pass.pyc differ diff --git a/test/bytecode_3.8/02_tryfinally_return.pyc b/test/bytecode_3.8/02_tryfinally_return.pyc index 9b946df39..c46b7c6ea 100644 Binary files a/test/bytecode_3.8/02_tryfinally_return.pyc and b/test/bytecode_3.8/02_tryfinally_return.pyc differ diff --git a/test/bytecode_3.8/03_async_from_coroutine.pyc b/test/bytecode_3.8/03_async_from_coroutine.pyc new file mode 100644 index 000000000..50c0dde1c Binary files /dev/null and b/test/bytecode_3.8/03_async_from_coroutine.pyc differ diff --git a/test/bytecode_3.8/03_while_bug.pyc b/test/bytecode_3.8/03_while_bug.pyc new file mode 100644 index 000000000..20a2d6d1c Binary files /dev/null and b/test/bytecode_3.8/03_while_bug.pyc differ diff --git a/test/bytecode_3.8/16_no_bytestring_docstring.pyc b/test/bytecode_3.8/16_no_bytestring_docstring.pyc new file mode 100644 index 000000000..6d41643df Binary files /dev/null and b/test/bytecode_3.8/16_no_bytestring_docstring.pyc differ diff --git a/test/bytecode_3.8_run/00_bug_dict_comp.pyc b/test/bytecode_3.8_run/00_bug_dict_comp.pyc new file mode 100644 index 000000000..8fd6638c7 Binary files /dev/null and b/test/bytecode_3.8_run/00_bug_dict_comp.pyc differ diff --git a/test/bytecode_3.8_run/02_fstring_debug.pyc b/test/bytecode_3.8_run/02_fstring_debug.pyc new file mode 100644 index 000000000..a62b99f36 Binary files /dev/null and b/test/bytecode_3.8_run/02_fstring_debug.pyc differ diff --git a/test/bytecode_3.8_run/03_comprehension_in_lambda.pyc b/test/bytecode_3.8_run/03_comprehension_in_lambda.pyc new file mode 100644 index 000000000..76233c0ea Binary files /dev/null and b/test/bytecode_3.8_run/03_comprehension_in_lambda.pyc differ diff --git a/test/bytecode_3.8_run/05_long_literals.pyc b/test/bytecode_3.8_run/05_long_literals.pyc index d599a738c..38c0ed30f 100644 Binary files a/test/bytecode_3.8_run/05_long_literals.pyc and b/test/bytecode_3.8_run/05_long_literals.pyc differ diff --git a/test/bytecode_3.8_run/08_test_contextmanager.pyc b/test/bytecode_3.8_run/08_test_contextmanager.pyc new file mode 100644 index 000000000..f610c7e9c Binary files /dev/null and b/test/bytecode_3.8_run/08_test_contextmanager.pyc differ diff --git a/test/decompyle/test_prettyprint.py b/test/decompyle/test_prettyprint.py index 957d72eb7..634041a6c 100644 --- a/test/decompyle/test_prettyprint.py +++ b/test/decompyle/test_prettyprint.py @@ -1,6 +1,6 @@ """ test_prettyprint.py -- source test pattern for tesing the prettyprint - funcionality of decompyle + functionality of decompyle This source is part of the decompyle test suite. diff --git a/test/ok_lib2.7/bsddb/dbshelve.py b/test/ok_lib2.7/bsddb/dbshelve.py index 7d0daa2f2..60469b6ab 100644 --- a/test/ok_lib2.7/bsddb/dbshelve.py +++ b/test/ok_lib2.7/bsddb/dbshelve.py @@ -29,6 +29,7 @@ #------------------------------------------------------------------------ import sys + absolute_import = (sys.version_info[0] >= 3) if absolute_import : # Because this syntaxis is not valid before Python 2.5 @@ -229,7 +230,7 @@ def append(self, value, txn=None): def associate(self, secondaryDB, callback, flags=0): def _shelf_callback(priKey, priData, realCallback=callback): - # Safe in Python 2.x because expresion short circuit + # Safe in Python 2.x because expression short circuit if sys.version_info[0] < 3 or isinstance(priData, bytes) : data = cPickle.loads(priData) else : @@ -366,7 +367,7 @@ def _extract(self, rec): return None else: key, data = rec - # Safe in Python 2.x because expresion short circuit + # Safe in Python 2.x because expression short circuit if sys.version_info[0] < 3 or isinstance(data, bytes) : return key, cPickle.loads(data) else : diff --git a/test/simple_source/bug14/test_builtin.py b/test/simple_source/bug14/test_builtin.py new file mode 100644 index 000000000..81f8abdab --- /dev/null +++ b/test/simple_source/bug14/test_builtin.py @@ -0,0 +1,8 @@ +from test_support import * +print '4. Built-in functions' +print 'test_b1' +unload('test_b1') +import test_b1 +print 'test_b2' +unload('test_b2') +import test_b2 diff --git a/test/simple_source/bug25/06_if_and_bugs.py b/test/simple_source/bug25/06_if_and_bugs.py new file mode 100644 index 000000000..0dbd1636b --- /dev/null +++ b/test/simple_source/bug25/06_if_and_bugs.py @@ -0,0 +1,18 @@ +# 2.5 Bug is from nose/plugins/cover.py +def wantFile(self, file, package=None): + if self.coverInclusive: + if file.endswith(".py"): + if package and self.coverPackages: + for want in self.coverPackages: + if package.startswith(want): + return True + else: + return True + return None + + +# 2.5 bug is from nose/plugins/doctests.py +def wantFile2(self, file): + if self and (self.conf or [exc.search(file) for exc in self.conf]): + return True + return None diff --git a/test/simple_source/bug26/.gitignore b/test/simple_source/bug26/.gitignore new file mode 100644 index 000000000..183700bc1 --- /dev/null +++ b/test/simple_source/bug26/.gitignore @@ -0,0 +1 @@ +/.python-version diff --git a/test/simple_source/bug26/03_weird26.py b/test/simple_source/bug26/03_weird26.py index 9b583eed0..b0c97df19 100644 --- a/test/simple_source/bug26/03_weird26.py +++ b/test/simple_source/bug26/03_weird26.py @@ -3,10 +3,10 @@ # Grammar allows multiple adjacent 'if's in listcomps and genexps, # even though it's silly. Make sure it works (ifelse broke this.) -[ x for x in range(10) if x % 2 if x % 3 ] +[x for x in range(10) if x % 2 if x % 3] list(x for x in range(10) if x % 2 if x % 3) -# expresion which evaluates True unconditionally, +# expression which evaluates True unconditionally, # but leave dead code or junk around that we have to match on. # Tests "if_exp_true" rule 5 if 1 else 2 diff --git a/test/simple_source/bug26/07_try_except.py b/test/simple_source/bug26/07_try_except.py new file mode 100644 index 000000000..50fbe4e89 --- /dev/null +++ b/test/simple_source/bug26/07_try_except.py @@ -0,0 +1,34 @@ +# Bug portion of Issue #405 https://github.com/rocky/python-uncompyle6/issues/405 +# Bug was detecting if/else as the last item in a "try: .. except" block. +class Saveframe(object): + """A saveframe. Use the classmethod from_scratch to create one.""" + + frame_list = {} + + def frame_dict(self): + return + + # Next line is 1477 + def __setitem__(self, key, item): + # Next line is 1481 + if isinstance(item, Saveframe): + try: + self.frame_list[key] = item + except TypeError: + if key in (self.frame_dict()): + dict((frame.name, frame) for frame in self.frame_list) + for pos, frame in enumerate(self.frame_list): + if frame.name == key: + self.frame_list[pos] = item + else: + raise KeyError( + "Saveframe with name '%s' does not exist and " + "therefore cannot be written to. Use the add_saveframe method to add new saveframes." + % key + ) + # Next line is 1498 + raise ValueError("You can only assign an entry to a saveframe splice.") + + +x = Saveframe() +x.__setitem__("foo", 5) diff --git a/test/simple_source/bug27+/01_argument_quoting.py b/test/simple_source/bug27+/01_argument_quoting.py new file mode 100644 index 000000000..3defbb600 --- /dev/null +++ b/test/simple_source/bug27+/01_argument_quoting.py @@ -0,0 +1,7 @@ +# Bug was erroneously putting quotes around Exception on decompilatoin +# RUNNABLE! + +"""This program is self-checking!""" +z = ["y", Exception] +assert z[0] == "y" +assert isinstance(z[1], Exception) diff --git a/test/simple_source/bug27+/01_module_doc.py b/test/simple_source/bug27+/01_module_doc.py index 0ceef372e..d2e3282d6 100644 --- a/test/simple_source/bug27+/01_module_doc.py +++ b/test/simple_source/bug27+/01_module_doc.py @@ -1,8 +1,8 @@ # From 2.7.17 test_bdb.py -# The problem was detecting a docstring at the begining of the module +# The problem was detecting a docstring at the beginning of the module # It must be detected and change'd or else the "from __future__" below # is invalid. -# Note that this has to be compiled with optimation < 2 or else optimization +# Note that this has to be compiled with optimization < 2 or else optimization # will remove the docstring """Rational, infinite-precision, real numbers.""" diff --git a/test/simple_source/bug27+/03_comprehension_in_lambda.py b/test/simple_source/bug27+/03_comprehension_in_lambda.py new file mode 100644 index 000000000..6928575dd --- /dev/null +++ b/test/simple_source/bug27+/03_comprehension_in_lambda.py @@ -0,0 +1,11 @@ +# RUNNABLE! +# From issue 469 + +"""This program is self-checking!""" + +my_dict = (lambda variable0: {variable1: 123 for variable1 in variable0})([1, 2, 3]) + +assert my_dict[1] == 123 + +my_set = (lambda variable0: {variable1 for variable1 in variable0})([1, 2, 3]) +assert 2 in my_set diff --git a/test/simple_source/bug27+/04_try_tryelse.py b/test/simple_source/bug27+/04_try_tryelse.py index 366f3c6ff..9ab39042d 100644 --- a/test/simple_source/bug27+/04_try_tryelse.py +++ b/test/simple_source/bug27+/04_try_tryelse.py @@ -1,5 +1,5 @@ # From 2.7 test_normalize.py -# Bug has to to with finding the end of the tryelse block. I think thrown +# Bug has to do with finding the end of the tryelse block. I think thrown # off by the "continue". In instructions the COME_FROM for END_FINALLY # was at the wrong offset because some sort of "rtarget" was adjust. diff --git a/test/simple_source/bug27+/07_for_if_else-continue.py b/test/simple_source/bug27+/07_for_if_else-continue.py new file mode 100644 index 000000000..d1e421fee --- /dev/null +++ b/test/simple_source/bug27+/07_for_if_else-continue.py @@ -0,0 +1,41 @@ +# Issue #413 on 2.7 +# Bug in handling CONTINUE in else block of if-then-else in a for loop +# Bug was "if" and "else" jump back to loop getting detected. +# RUNNABLE! + +"""This program is self-checking!""" +def test1(a, r = []): + for b in a: + if b: + r.append(3) + else: + r.append(5) + continue + if r == []: + pass + return r + +def test2(a, r = None): + for b in a: + if b: + #pass # No payload + continue + raise AssertionError("CONTINUE not followed") + else: + continue + raise AssertionError("CONTINUE not followed") + if b: + r = b + raise AssertionError("CONTINUE not followed") + return r + +assert test1([], []) == [], "For loop not taken" +assert test1([False], []) == [5], "if 'else' should have been taken" +assert test1([True], []) == [3], "if 'then' should have been taken" +assert test1([True, True], []) == [3, 3], "if should have been taken" +assert test1([True, False], []) == [3, 5], "if and then 'else' should have been taken" +assert test1([False, True], []) == [5, 3], "if else and then 'then' should have been taken" +assert test1([False, False], []) == [5, 5], "if else should have been taken twice" +assert test1([True, True], []) == [3, 3], "if 'then' should have been taken twice" +assert test2([True]) is None, "Incorrect flow" +assert test2([False]) is None, "Incorrect flow" diff --git a/test/simple_source/bug30/01_ops.py b/test/simple_source/bug30/01_ops.py index c984b3dd2..6536b180a 100644 --- a/test/simple_source/bug30/01_ops.py +++ b/test/simple_source/bug30/01_ops.py @@ -1,20 +1,20 @@ # Statements to beef up grammar coverage rules # Force "inplace" ops # Note this is like simple_source/bug22/01_ops.py -# But we don't ahve the UNARY_CONVERT which dropped +# But we don't have the UNARY_CONVERT which dropped # out around 2.7 y = +10 # UNARY_POSITIVE -y /= 1 # INPLACE_DIVIDE -y %= 4 # INPLACE_MODULO +y /= 1 # INPLACE_DIVIDE +y %= 4 # INPLACE_MODULO y **= 1 # INPLACE POWER y >>= 2 # INPLACE_RSHIFT y <<= 2 # INPLACE_LSHIFT y //= 1 # INPLACE_TRUE_DIVIDE -y &= 1 # INPLACE_AND -y ^= 1 # INPLACE_XOR +y &= 1 # INPLACE_AND +y ^= 1 # INPLACE_XOR # Beef up aug_assign and STORE_SLICE+3 -x = [1,2,3,4,5] +x = [1, 2, 3, 4, 5] x[0:1] = 1 x[0:3] += 1, 2, 3 diff --git a/test/simple_source/bug33/01_triple_compare.py b/test/simple_source/bug33/01_triple_compare.py index dc28d36c1..cdbc18ab0 100644 --- a/test/simple_source/bug33/01_triple_compare.py +++ b/test/simple_source/bug33/01_triple_compare.py @@ -1,7 +1,7 @@ # In Python 3.3+ this uses grammar rule -# compare_chained2 ::= expr COMPARE_OP RETURN_VALUE +# compare_chained_right ::= expr COMPARE_OP RETURN_VALUE # In Python 3.6 uses this uses grammar rule -# compare_chained2 ::= expr COMPARE_OP come_froms JUMP_FORWARD +# compare_chained_right ::= expr COMPARE_OP come_froms JUMP_FORWARD # Seen in Python 3.3 ipaddress.py diff --git a/test/simple_source/bug33/04_lambda_star_default.py b/test/simple_source/bug33/04_lambda_star_default.py index 879b8960a..a1e2db671 100644 --- a/test/simple_source/bug33/04_lambda_star_default.py +++ b/test/simple_source/bug33/04_lambda_star_default.py @@ -1,18 +1,20 @@ # From 3.x test_audiop.py # Bug is handling default value after * argument in a lambda. -# That's a mouthful of desciption; I am not sure if the really +# That's a mouthful of description; I am not sure if the really # hacky fix to the code is even correct. # # FIXME: try and test with more than one default argument. + # RUNNABLE def pack(width, data): return (width, data) + packs = {w: (lambda *data, width=w: pack(width, data)) for w in (1, 2, 4)} -assert packs[1]('a') == (1, ('a',)) -assert packs[2]('b') == (2, ('b',)) -assert packs[4]('c') == (4, ('c',)) +assert packs[1]("a") == (1, ("a",)) +assert packs[2]("b") == (2, ("b",)) +assert packs[4]("c") == (4, ("c",)) diff --git a/test/simple_source/bug33/08_if_else.py b/test/simple_source/bug33/08_if_else.py index abb104dea..d16da5b86 100644 --- a/test/simple_source/bug33/08_if_else.py +++ b/test/simple_source/bug33/08_if_else.py @@ -1,16 +1,19 @@ # From python 3.3.7 trace # Bug was not having not having semantic rule for conditional not + # RUNNABLE! def init(modules=None): mods = set() if not modules else set(modules) return mods + assert init() == set() assert init([1, 2, 3]) == set([1, 2, 3]) + # From 3.6 sre_parse -# Bug was in handling multple COME_FROMS from nested if's +# Bug was in handling multiple COME_FROMS from nested if's def _escape(a, b, c, d, e): if a: if b: @@ -24,15 +27,16 @@ def _escape(a, b, c, d, e): return raise -assert _escape(False, True, True, True, True) is None -assert _escape(True, True, True, False, True) is None -assert _escape(True, True, False, False, True) is None + +assert _escape(False, True, True, True, True) is None +assert _escape(True, True, True, False, True) is None +assert _escape(True, True, False, False, True) is None for args in ( - (True, True, True, False, True), - (True, False, True, True, True), - (True, False, True, True, False), - ): + (True, True, True, False, True), + (True, False, True, True, True), + (True, False, True, True, False), +): try: _escape(*args) assert False, args diff --git a/test/simple_source/bug34/02_make_closure.py b/test/simple_source/bug34/02_make_closure.py new file mode 100644 index 000000000..86f8c2253 --- /dev/null +++ b/test/simple_source/bug34/02_make_closure.py @@ -0,0 +1,18 @@ +# Related to #426 + +# This file is RUNNABLE! +"""This program is self-checking!""" + +a = 5 +class MakeClosureTest(): + # This function uses MAKE_CLOSURE with annotation args + def __init__(self, dev: str, b: bool): + super().__init__() + self.dev = dev + self.b = b + self.a = a + +x = MakeClosureTest("dev", True) +assert x.dev == "dev" +assert x.b == True +assert x.a == 5 diff --git a/test/simple_source/bug34/03_ifelse_in_lambda.py b/test/simple_source/bug34/03_ifelse_in_lambda.py new file mode 100644 index 000000000..b498a0091 --- /dev/null +++ b/test/simple_source/bug34/03_ifelse_in_lambda.py @@ -0,0 +1,4 @@ +# Next line is 1164 +def foo(): + name = "bar" + lambda x: compile(x, "" % name, "exec") if x else None diff --git a/test/simple_source/bug35/02_for_else_bug.py b/test/simple_source/bug35/02_for_else_bug.py new file mode 100644 index 000000000..c8f85ad8c --- /dev/null +++ b/test/simple_source/bug35/02_for_else_bug.py @@ -0,0 +1,10 @@ +# Adapted 3.5 from _bootstrap_external.py + + +def spec_from_file_location(loader, location): + if loader: + for _ in __file__: + if location: + break + else: + return None diff --git a/test/simple_source/bug35/06_while_return.py b/test/simple_source/bug35/06_while_return.py index 735065f7c..a08949de6 100644 --- a/test/simple_source/bug35/06_while_return.py +++ b/test/simple_source/bug35/06_while_return.py @@ -1,8 +1,9 @@ # From Python 3.4 asynchat.py -# Tests presence or absense of +# Tests presence or absence of # SETUP_LOOP testexpr return_stmts POP_BLOCK COME_FROM_LOOP # Note: that there is no JUMP_BACK because of the return_stmts. + def initiate_send(a, b, c, num_sent): while a and b: try: @@ -24,6 +25,7 @@ def initiate_send2(a, b): return 2 + assert initiate_send(1, 1, 2, False) == 1 assert initiate_send(1, 2, 3, False) == 3 assert initiate_send(1, 2, 3, True) == 2 diff --git a/test/simple_source/bug36/02_kwargs.py b/test/simple_source/bug36/02_kwargs.py index 5b5af9e96..bb3b6ca3d 100644 --- a/test/simple_source/bug36/02_kwargs.py +++ b/test/simple_source/bug36/02_kwargs.py @@ -5,7 +5,7 @@ def bug(self, j, a, b): self.parse_comment(a, b, report=3) # From 3.6 fnmatch.py -# Bug was precidence parenthesis around decorator +# Bug was precedence parenthesis around decorator import functools @functools.lru_cache(maxsize=256, typed=True) diff --git a/test/simple_source/bug36/03_async_from_coroutine.py b/test/simple_source/bug36/03_async_from_coroutine.py new file mode 100644 index 000000000..30d3fcd1a --- /dev/null +++ b/test/simple_source/bug36/03_async_from_coroutine.py @@ -0,0 +1,48 @@ +# These are from 3.6 test_coroutines.py +async def run_gen(f): + return (10 async for i in f) + +async def run_list(f): + return [i async for i in f()] + +# async def run_dict(): +# return {i + 1 async for i in [10, 20]} + +async def iterate(gen): + res = [] + async for i in gen: + res.append(i) + return res + +def test_comp_5(f): + # async def f(it): + # for i in it: + # yield i + + async def run_list(): + return [i + for + pair in + ([10, 20]) + async for i + in f + ] + +async def test2(x, buffer, f): + with x: + async for i in f: + if i: + break + else: + buffer() + buffer() + +async def test3(x, buffer, f): + with x: + async for i in f: + if i: + continue + buffer() + else: + buffer.append() + buffer() diff --git a/test/simple_source/bug36/03_fn_defaults.py b/test/simple_source/bug36/03_fn_defaults.py index 6167ce0cc..af27ee25c 100644 --- a/test/simple_source/bug36/03_fn_defaults.py +++ b/test/simple_source/bug36/03_fn_defaults.py @@ -1,13 +1,20 @@ -# Python 3.6 changes, yet again, the way deafult pairs are handled +# Python 3.6 changes, yet again, the way default pairs are handled def foo1(bar, baz=1): return 1 + + def foo2(bar, baz, qux=1): return 2 + + def foo3(bar, baz=1, qux=2): return 3 + + def foo4(bar, baz, qux=1, quux=2): return 4 + # From 3.6 compileall. # Bug was in omitting default which when used in an "if" # are treated as False would be diff --git a/test/simple_source/bug36/04_class_kwargs.py b/test/simple_source/bug36/04_class_kwargs.py index 55797a276..36072659a 100644 --- a/test/simple_source/bug36/04_class_kwargs.py +++ b/test/simple_source/bug36/04_class_kwargs.py @@ -1,17 +1,23 @@ # From 3.6 test_abc.py -# Bug was Reciever() class definition +# Bug was Receiver() class definition import abc import unittest + + class TestABCWithInitSubclass(unittest.TestCase): def test_works_with_init_subclass(self): class ReceivesClassKwargs: def __init_subclass__(cls, **kwargs): super().__init_subclass__() + class Receiver(ReceivesClassKwargs, abc.ABC, x=1, y=2, z=3): pass + def test_abstractmethod_integration(self): for abstractthing in [abc.abstractmethod]: + class C(metaclass=abc.ABCMeta): @abstractthing - def foo(self): pass # abstract + def foo(self): + pass # abstract diff --git a/test/simple_source/bug36/05_if_and_comp.py b/test/simple_source/bug36/05_if_and_comp.py index 9c4bc61d9..b29594518 100644 --- a/test/simple_source/bug36/05_if_and_comp.py +++ b/test/simple_source/bug36/05_if_and_comp.py @@ -1,12 +1,12 @@ # From 3.6 base64.py -# Bug was handling "and" condition in the presense of POP_JUMP_IF_FALSE +# Bug was handling "and" condition in the presence of POP_JUMP_IF_FALSE # locations def _85encode(foldnuls, words): - return ['z' if foldnuls and word - else 'y' - for word in words] + return ["z" if foldnuls and word else "y" for word in words] + # From Python 3.6 enum.py + def __new__(metacls, cls, bases, classdict): {k: classdict[k] for k in classdict._member_names} diff --git a/test/simple_source/bug36/09_long_whilestmt.py b/test/simple_source/bug36/09_long_whilestmt.py new file mode 100644 index 000000000..1cf98b6b8 --- /dev/null +++ b/test/simple_source/bug36/09_long_whilestmt.py @@ -0,0 +1,74 @@ +# From https://github.com/rocky/python-uncompyle6/issues/420 +# Related to EXTENDED_ARG in whilestmt +ERRPR_CODE_DEFINE = {} # Remove this and things works + +try: + print() +except Exception: + var1 = 0 + var2 = 1 + if var1 or var2: + times = 1 + while times != False and self.scanner.is_open(): + try: + try: + print() + except Exception: + print() + + out = 0 + count = 1 + if out == 1: + break + elif out == 2: + count += 1 + if times == 3: + self.func.emit({}) + break + else: + continue + if out == 3 or out == b"": + if self.times == 3: + break + count += 1 + if count == 3: + count = 0 + if out == 4: + self.func.emit(ERRPR_CODE_DEFINE.ReceiedError()) + else: + print() + break + continue + else: + count = 0 + except Exception: + print("upper exception") + else: + try: + print("jump forward") + while True: + out = self.func.read(count) + if out == b"": + self.func.emit(ERRPR_CODE_DEFINE.ReceiedError()) + break + continue + imagedata = out[0] + if imagedata == b"\x05": + self.func.emit(INFORMATION.UnsupportedImage()) + break + continue + if imagedata == b"\x15": + self.func.emit(INFORMATION.NoneImage()) + break + continue + if out[1] == False: + start_index = imagedata.find(b"BM6") + self.func.emit(imagedata[start_index:], False) + continue + (imagedata, all_code) = imagedata + self.func.emit({}) + self.func.emit({}) + self.func.emit({}) # remove {} and this works + break + except Exception: + pass diff --git a/test/simple_source/bug36/10_fstring.py b/test/simple_source/bug36/10_fstring.py index ef38dabf6..9a93db7ec 100644 --- a/test/simple_source/bug36/10_fstring.py +++ b/test/simple_source/bug36/10_fstring.py @@ -14,6 +14,13 @@ assert "def0" == f"{abc}0" assert "defdef" == f"{abc}{abc!s}" + +# From 3.8 test/test_string.py +# We had the precedence of yield vs. lambda incorrect. +def fn(x): + yield f"x:{yield (lambda i: x * i)}" + + # From 3.6 functools.py # Bug was handling format operator strings. @@ -53,9 +60,9 @@ assert "05$" == f"{log_rounds:02d}$" -def testit(a, b, l): - # print(l) - return l +def testit(a, b, ll): + # print(ll) + return ll # The call below shows the need for BUILD_STRING to count expr arguments. @@ -91,16 +98,17 @@ def _repr_fn(fields): (x, y, width) = ("foo", 2, 10) assert f"x={x*y:{width}}" == "x=foofoo " + # Why the fact that the distinction of docstring versus stmt is a # string expression is important academic, but we will decompile an -# equivalent thing. For compatiblity with older Python we'll use "%" +# equivalent thing. For compatibility with older Python we'll use "%" # instead of a format string def f(): - f"""Not a docstring""" + f"""Not a docstring""" # noqa def g(): - """Not a docstring""" f"" + """Not a docstring""" f"" # noqa assert f.__doc__ is None @@ -123,3 +131,17 @@ def g(): # The former, {{ confuses the format strings so dictionary/set comprehensions # don't work. assert f"expr={ {x: y for x, y in [(1, 2), ]}}" == "expr={1: 2}" + + +class Line: + def __init__(self, x, y): + self.x = x + self.y = y + + # From 3.7 test_typing.py + def __str__(self): + return f"{self.x} -> {self.y}" + + +line = Line(1, 2) +assert str(line) == "1 -> 2" diff --git a/test/simple_source/bug36/10_long_pop_jump.py b/test/simple_source/bug36/10_long_pop_jump.py index ad6c4de7f..da6a1981b 100644 --- a/test/simple_source/bug36/10_long_pop_jump.py +++ b/test/simple_source/bug36/10_long_pop_jump.py @@ -1,26 +1,27 @@ # From 3.6 _markupbase.py -# Bug is that the routine is long enough that POP_JUMP_IF_FALSE instruciton has an -# EXTENDED_ARG intruction before it and we weren't picking out the jump offset properly +# Bug is that the routine is long enough that POP_JUMP_IF_FALSE instruction has an +# EXTENDED_ARG instruction before it and we weren't picking out the jump offset properly + def parse_declaration(self, i): if rawdata[j:j] in ("-", ""): return -1 n = len(rawdata) - if rawdata[j:j+2] == '-': + if rawdata[j : j + 2] == "-": return self.parse_comment(i) - elif rawdata[j] == '[': + elif rawdata[j] == "[": return self.parse_marked_section(i) else: decltype, j = self._scan_name(j, i) if j < 0: return j if decltype == "d": - self._decl_otherchars = '' + self._decl_otherchars = "" while j < n: c = rawdata[j] if c == ">": - data = rawdata[i+2:j] + data = rawdata[i + 2 : j] if decltype == "d": self.handle_decl(data) else: @@ -43,8 +44,7 @@ def parse_declaration(self, i): else: self.error("unexpected '[' char in declaration") else: - self.error( - "unexpected %r char in declaration" % rawdata[j]) + self.error("unexpected %r char in declaration" % rawdata[j]) if j < 0: return j return -1 diff --git a/test/simple_source/bug37/10_extendedargifelse.py b/test/simple_source/bug37/10_extendedargifelse.py new file mode 100644 index 000000000..d8c95091b --- /dev/null +++ b/test/simple_source/bug37/10_extendedargifelse.py @@ -0,0 +1,272 @@ +# This is RUNNABLE! + +"""This program is self-checking!""" + +# Bug was handling if which has EXTENDED_ARG +# See https://github.com/rocky/python-uncompyle6/pull/406 + +aa = 0 +ab = 0 +ac = 0 +ad = 0 +ae = 0 +af = 0 +ag = 0 +ah = 0 +ai = 0 +aj = 0 +ak = 0 +al = 0 +am = 0 +an = 0 +ao = 0 +ap = 0 +aq = 0 +ar = 0 +at = 0 +au = 0 +av = 0 +aw = 0 +ax = 0 +ay = 0 +az = 0 +ba = 0 +bb = 0 +bc = 0 +bd = 0 +be = 0 +bf = 0 +bg = 0 +bh = 0 +bi = 0 +bj = 0 +bk = 0 +bl = 0 +bm = 0 +bn = 0 +bo = 0 +bp = 0 +bq = 0 +br = 0 +bs = 0 +bt = 0 +bu = 0 +bv = 0 +bw = 0 +bx = 0 +by = 0 +bz = 0 +ca = 0 +cb = 0 +cc = 0 +cd = 0 +ce = 0 +cf = 0 +cg = 0 +ch = 0 +ci = 0 +cj = 0 +ck = 0 +cl = 0 +cm = 0 +cn = 0 +co = 0 +cp = 0 +cq = 0 +cr = 0 +cs = 0 +ct = 0 +cu = 0 +cv = 0 +cw = 0 +cx = 0 +cy = 0 +cz = 0 +da = 0 +db = 0 +dc = 0 +dd = 0 +de = 0 +df = 0 +dg = 0 +dh = 0 +di = 0 +dj = 0 +dk = 0 +dl = 0 +dm = 0 +dn = 0 +do = 0 +dp = 0 +dq = 0 +dr = 0 +ds = 0 +dt = 0 +du = 0 +dv = 0 +dw = 0 +dx = 0 +dy = 0 +dz = 0 +ea = 0 +eb = 0 +ec = 0 +ed = 0 +ee = 0 +ef = 0 +eg = 0 +eh = 0 +ei = 0 +ej = 0 +ek = 0 +el = 0 +em = 0 +en = 0 +eo = 0 +ep = 0 +eq = 0 +er = 0 +es = 0 +et = 0 +eu = 0 +ev = 0 +ew = 0 +ex = 0 +ey = 0 +ez = 0 +fa = 0 +fb = 0 +fc = 0 +fd = 0 +fe = 0 +ff = 0 +fg = 0 +fh = 0 +fi = 0 +fj = 0 +fk = 0 +fl = 0 +fm = 0 +fn = 0 +fo = 0 +fp = 0 +fq = 0 +fr = 0 +fs = 0 +ft = 0 +fu = 0 +fv = 0 +fw = 0 +fx = 0 +fy = 0 +fz = 0 +ga = 0 +gb = 0 +gc = 0 +gd = 0 +ge = 0 +gf = 0 +gg = 0 +gh = 0 +gi = 0 +gj = 0 +gk = 0 +gl = 0 +gm = 0 +gn = 0 +go = 0 +gp = 0 +gq = 0 +gr = 0 +gs = 0 +gt = 0 +gu = 0 +gv = 0 +gw = 0 +gx = 0 +gy = 0 +gz = 0 +ha = 0 +hb = 0 +hc = 0 +hd = 0 +he = 0 +hf = 0 +hg = 0 +hh = 0 +hi = 0 +hj = 0 +hk = 0 +hl = 0 +hm = 0 +hn = 0 +ho = 0 +hp = 0 +hq = 0 +hr = 0 +hs = 0 +ht = 0 +hu = 0 +hv = 0 +hw = 0 +hx = 0 +hy = 0 +hz = 0 +ia = 0 +ib = 0 +ic = 0 +id = 0 +ie = 0 +ig = 0 +ih = 0 +ii = 0 +ij = 0 +ik = 0 +il = 0 +im = 0 +io = 0 +ip = 0 +iq = 0 +ir = 0 +it = 0 +iu = 0 +iv = 0 +iw = 0 +ix = 0 +iy = 0 +iz = 0 +ja = 0 +jb = 0 +jc = 0 +jd = 0 +je = 0 +jf = 0 +jg = 0 +jh = 0 +ji = 0 +jj = 0 +jk = 0 +jl = 0 +jm = 0 +jn = 0 +jo = 0 +jp = 0 +jq = 0 +jr = 0 +js = 0 +jt = 0 +ju = 0 +jv = 0 +jw = 0 +jx = 0 +jy = 0 +jz = 0 +ka = 0 +kb = 0 +kc = 0 +var = True +if var: + aa = 1 +else: + aa = 2 +assert aa == 1 diff --git a/test/simple_source/bug38/00_bug_dict_comp.py b/test/simple_source/bug38/00_bug_dict_comp.py new file mode 100644 index 000000000..c9b69db28 --- /dev/null +++ b/test/simple_source/bug38/00_bug_dict_comp.py @@ -0,0 +1,12 @@ +# Issue 104 +# Python 3.8 reverses the order or keys and values in +# dictionary comprehensions from the order in all previous Pythons. +# Also we were looking in the wrong place for the collection of the +# dictionary comprehension +# RUNNABLE! + +"""This program is self-checking!""" +x = [(0, [1]), (2, [3])] +for i in range(0, 1): + y = {key: val[i - 1] for (key, val) in x} +assert y == {0: 1, 2: 3} diff --git a/test/simple_source/bug38/00_while_true_pass.py b/test/simple_source/bug38/00_while_true_pass.py new file mode 100644 index 000000000..cdc01f0d3 --- /dev/null +++ b/test/simple_source/bug38/00_while_true_pass.py @@ -0,0 +1,6 @@ +# The 3.8 bugs were in detecting +# 1) while True: pass +# 2) confusing the "if" ending in a loop jump with a "while" +if __name__: + while True: + pass diff --git a/test/simple_source/bug38/02_fstring_debug.py b/test/simple_source/bug38/02_fstring_debug.py new file mode 100644 index 000000000..e0bd5cedf --- /dev/null +++ b/test/simple_source/bug38/02_fstring_debug.py @@ -0,0 +1,32 @@ +# Tests new "debug" format new in 3.8. +# Much of this is adapted from 3.8 test/test_fstring.py +# RUNNABLE! + +"""This program is self-checking!""" + +# fmt: off +# We want to use "=" and ":=" *without* the surrounding space to test format spec and "=" detection +f'{f"{3.1415=:.1f}":*^20}' == '*****3.1415=3.1*****' + +y = 2 +def f(x, width): + return f'x={x*y:{width}}' + +assert f('foo', 10) == 'x=foofoo ' + +x = 'bar' +assert f(10, 10), 'x= 20' + +x = 'A string' +f"x={x!r}" == 'x=' + repr(x) + +pi = 'π' +assert f'alpha α {pi=} ω omega', "alpha α pi='π' ω omega" + +x = 20 +# This isn't an assignment expression, it's 'x', with a format +# spec of '=10'. +assert f'{x:=10}' == ' 20' + +assert f'{(x:=10)}' == '10' +assert x == 10 diff --git a/test/simple_source/bug38/03_while_bug.py b/test/simple_source/bug38/03_while_bug.py new file mode 100644 index 000000000..4ed198b6b --- /dev/null +++ b/test/simple_source/bug38/03_while_bug.py @@ -0,0 +1,9 @@ +# See https://github.com/rocky/python-uncompyle6/issues/498 +# Bug was in not allowing _stmts in whilestmt38 +import time + +r = 0 +while r == 1: + print(time.time()) + if r == 1: + r = 0 diff --git a/test/simple_source/calls/01_positional.py b/test/simple_source/calls/01_positional.py index 3cc17e25b..9527bf899 100644 --- a/test/simple_source/calls/01_positional.py +++ b/test/simple_source/calls/01_positional.py @@ -1,5 +1,5 @@ # Tests custom added grammar rule: # expr ::= expr {expr}^n CALL_FUNCTION_n -# which in the specifc case below is: +# which in the specific case below is: # expr ::= expr expr expr CALL_FUNCTION_2 max(1, 2) diff --git a/test/simple_source/expression/05_long_literals.py b/test/simple_source/expression/05_long_literals.py index 9d0a0bf0d..f03d44937 100644 --- a/test/simple_source/expression/05_long_literals.py +++ b/test/simple_source/expression/05_long_literals.py @@ -161,7 +161,9 @@ "return": 12, } -assert tuple(x.keys()) == ("b", "c", "e", "g", "h", "j", "k", "return") +# We need sorted here and below, because x.keys() in 2.7 comes out in the reverse order. +# Go figure. +assert sorted(x.keys()) == ["b", "c", "e", "g", "h", "j", "k", "return"] # Ensure that in dictionary we produce integers, not strings x = {1: 2, 3: 4} @@ -675,7 +677,12 @@ "value502": 502 + 1, } -assert list(values.values())[1:] == list(range(3, 502 + 2)) +import sys +if sys.version < (3, 0): + # Python 2.7 is funky with values.values() ordering + assert sorted(values.values())[1:-2] == list(range(4, 502 + 1)) +else: + assert list(values.values())[1:] == list(range(3, 502 + 2)) # Try a long dictionary that fails because we have a binary op. # We can get a expr32 grouping speedup @@ -717,4 +724,14 @@ "value33": 33, } -assert list(values.values())[1:] == list(range(2, 34)) +assert sorted(values.values())[1:] == list(range(2, 34)) + +def assert_equal(x, y): + assert x == y + +# Check that we can distinguish names from strings in literal collections, e.g. lists. +# The list has to have more than 4 items to get accumulated in a collection +a = ["y", 'Exception', "x", Exception, "z"] + +assert_equal(a[1], "Exception") +assert_equal(a[3], Exception) diff --git a/test/simple_source/expression/05_long_literals24.py b/test/simple_source/expression/05_long_literals24.py new file mode 100644 index 000000000..f62ba2225 --- /dev/null +++ b/test/simple_source/expression/05_long_literals24.py @@ -0,0 +1,721 @@ +# Long lists pose a slowdown in uncompiling. +"This program is self-checking!" + +# Try an empty list to check that long-matching detection doesn't mess that up. +# In theory this should work even though we put cap on short lists which +# is checked below. +x = [] +assert len(x) == 0 and isinstance(x, list) + +# Try an short list to check that long-matching detection doesn't mess that up. +# This is a more general situation of the above. +x = [1, 1, 1] + +# Until we have better "and" rules (which we have +# around, but not in decompyle3 or uncompyle6 yet) +# avoid 3-term "and"s +assert len(x) == 3 +assert isinstance(x, list) and all(x) + +# fmt: off +# Try a long list. This should not be slow +# as it has been in the past. +x = [ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +] + +assert all(x) +assert len(x) == 300 and isinstance(x, list) + +# Python before 2.7 doesn't have sets literal +# # Try a long set. This should not be slow +# # as it has been in the past. +# x = { +# 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +# 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +# 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +# 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +# 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +# 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +# 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +# 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +# 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +# } + +# assert x == {1} and isinstance(x, set) + +# Try using variables rather than constants +a = 1 +# First, a list +x = [ + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, a, a, +] + +assert all(x) +assert len(x) == 300 and isinstance(x, list) + +# Next, a set + +# x = { +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# a, a, a, a, a, a, a, a, a, a, +# } + +# assert x == {1} and isinstance(x, set) + +# Check some dictionary keys. +# Ensure that in dictionary we produce quoted strings +x = { + "b": 1, + "c": 2, + "e": 3, + "g": 6, + "h": 7, + "j": 9, + "k": 11, + "return": 12, +} + +assert sorted(x.keys()) == ["b", "c", "e", "g", "h", "j", "k", "return"] + +# Ensure that in dictionary we produce integers, not strings +x = {1: 2, 3: 4} + +assert tuple(x.keys()) == (1, 3) + +# Try a long dictionary. +# This should not be slow as it has been in the past +values = { + "value1": x, # Note this is LOAD_NAME + "value2": 2 + 1, # Constant should be folded into "LOAD_CONST" + "value3": 3 + 1, + "value4": 4 + 1, + "value5": 5 + 1, + "value6": 6 + 1, + "value7": 7 + 1, + "value8": 8 + 1, + "value9": 9 + 1, + "value10": 10 + 1, + "value11": 11 + 1, + "value12": 12 + 1, + "value13": 13 + 1, + "value14": 14 + 1, + "value15": 15 + 1, + "value16": 16 + 1, + "value17": 17 + 1, + "value18": 18 + 1, + "value19": 19 + 1, + "value20": 20 + 1, + "value21": 21 + 1, + "value22": 22 + 1, + "value23": 23 + 1, + "value24": 24 + 1, + "value25": 25 + 1, + "value26": 26 + 1, + "value27": 27 + 1, + "value28": 28 + 1, + "value29": 29 + 1, + "value30": 30 + 1, + "value31": 31 + 1, + "value32": 32 + 1, + "value33": 33 + 1, + "value34": 34 + 1, + "value35": 35 + 1, + "value36": 36 + 1, + "value37": 37 + 1, + "value38": 38 + 1, + "value39": 39 + 1, + "value40": 40 + 1, + "value41": 41 + 1, + "value42": 42 + 1, + "value43": 43 + 1, + "value44": 44 + 1, + "value45": 45 + 1, + "value46": 46 + 1, + "value47": 47 + 1, + "value48": 48 + 1, + "value49": 49 + 1, + "value50": 50 + 1, + "value51": 51 + 1, + "value52": 52 + 1, + "value53": 53 + 1, + "value54": 54 + 1, + "value55": 55 + 1, + "value56": 56 + 1, + "value57": 57 + 1, + "value58": 58 + 1, + "value59": 59 + 1, + "value60": 60 + 1, + "value61": 61 + 1, + "value62": 62 + 1, + "value63": 63 + 1, + "value64": 64 + 1, + "value65": 65 + 1, + "value66": 66 + 1, + "value67": 67 + 1, + "value68": 68 + 1, + "value69": 69 + 1, + "value70": 70 + 1, + "value71": 71 + 1, + "value72": 72 + 1, + "value73": 73 + 1, + "value74": 74 + 1, + "value75": 75 + 1, + "value76": 76 + 1, + "value77": 77 + 1, + "value78": 78 + 1, + "value79": 79 + 1, + "value80": 80 + 1, + "value81": 81 + 1, + "value82": 82 + 1, + "value83": 83 + 1, + "value84": 84 + 1, + "value85": 85 + 1, + "value86": 86 + 1, + "value87": 87 + 1, + "value88": 88 + 1, + "value89": 89 + 1, + "value90": 90 + 1, + "value91": 91 + 1, + "value92": 92 + 1, + "value93": 93 + 1, + "value94": 94 + 1, + "value95": 95 + 1, + "value96": 96 + 1, + "value97": 97 + 1, + "value98": 98 + 1, + "value99": 99 + 1, + "value100": 100 + 1, + "value101": 101 + 1, + "value102": 102 + 1, + "value103": 103 + 1, + "value104": 104 + 1, + "value105": 105 + 1, + "value106": 106 + 1, + "value107": 107 + 1, + "value108": 108 + 1, + "value109": 109 + 1, + "value110": 110 + 1, + "value111": 111 + 1, + "value112": 112 + 1, + "value113": 113 + 1, + "value114": 114 + 1, + "value115": 115 + 1, + "value116": 116 + 1, + "value117": 117 + 1, + "value118": 118 + 1, + "value119": 119 + 1, + "value120": 120 + 1, + "value121": 121 + 1, + "value122": 122 + 1, + "value123": 123 + 1, + "value124": 124 + 1, + "value125": 125 + 1, + "value126": 126 + 1, + "value127": 127 + 1, + "value128": 128 + 1, + "value129": 129 + 1, + "value130": 130 + 1, + "value131": 131 + 1, + "value132": 132 + 1, + "value133": 133 + 1, + "value134": 134 + 1, + "value135": 135 + 1, + "value136": 136 + 1, + "value137": 137 + 1, + "value138": 138 + 1, + "value139": 139 + 1, + "value140": 140 + 1, + "value141": 141 + 1, + "value142": 142 + 1, + "value143": 143 + 1, + "value144": 144 + 1, + "value145": 145 + 1, + "value146": 146 + 1, + "value147": 147 + 1, + "value148": 148 + 1, + "value149": 149 + 1, + "value150": 150 + 1, + "value151": 151 + 1, + "value152": 152 + 1, + "value153": 153 + 1, + "value154": 154 + 1, + "value155": 155 + 1, + "value156": 156 + 1, + "value157": 157 + 1, + "value158": 158 + 1, + "value159": 159 + 1, + "value160": 160 + 1, + "value161": 161 + 1, + "value162": 162 + 1, + "value163": 163 + 1, + "value164": 164 + 1, + "value165": 165 + 1, + "value166": 166 + 1, + "value167": 167 + 1, + "value168": 168 + 1, + "value169": 169 + 1, + "value170": 170 + 1, + "value171": 171 + 1, + "value172": 172 + 1, + "value173": 173 + 1, + "value174": 174 + 1, + "value175": 175 + 1, + "value176": 176 + 1, + "value177": 177 + 1, + "value178": 178 + 1, + "value179": 179 + 1, + "value180": 180 + 1, + "value181": 181 + 1, + "value182": 182 + 1, + "value183": 183 + 1, + "value184": 184 + 1, + "value185": 185 + 1, + "value186": 186 + 1, + "value187": 187 + 1, + "value188": 188 + 1, + "value189": 189 + 1, + "value190": 190 + 1, + "value191": 191 + 1, + "value192": 192 + 1, + "value193": 193 + 1, + "value194": 194 + 1, + "value195": 195 + 1, + "value196": 196 + 1, + "value197": 197 + 1, + "value198": 198 + 1, + "value199": 199 + 1, + "value200": 200 + 1, + "value201": 201 + 1, + "value202": 202 + 1, + "value203": 203 + 1, + "value204": 204 + 1, + "value205": 205 + 1, + "value206": 206 + 1, + "value207": 207 + 1, + "value208": 208 + 1, + "value209": 209 + 1, + "value210": 210 + 1, + "value211": 211 + 1, + "value212": 212 + 1, + "value213": 213 + 1, + "value214": 214 + 1, + "value215": 215 + 1, + "value216": 216 + 1, + "value217": 217 + 1, + "value218": 218 + 1, + "value219": 219 + 1, + "value220": 220 + 1, + "value221": 221 + 1, + "value222": 222 + 1, + "value223": 223 + 1, + "value224": 224 + 1, + "value225": 225 + 1, + "value226": 226 + 1, + "value227": 227 + 1, + "value228": 228 + 1, + "value229": 229 + 1, + "value230": 230 + 1, + "value231": 231 + 1, + "value232": 232 + 1, + "value233": 233 + 1, + "value234": 234 + 1, + "value235": 235 + 1, + "value236": 236 + 1, + "value237": 237 + 1, + "value238": 238 + 1, + "value239": 239 + 1, + "value240": 240 + 1, + "value241": 241 + 1, + "value242": 242 + 1, + "value243": 243 + 1, + "value244": 244 + 1, + "value245": 245 + 1, + "value246": 246 + 1, + "value247": 247 + 1, + "value248": 248 + 1, + "value249": 249 + 1, + "value250": 250 + 1, + "value251": 251 + 1, + "value252": 252 + 1, + "value253": 253 + 1, + "value254": 254 + 1, + "value255": 255 + 1, + "value256": 256 + 1, + "value257": 257 + 1, + "value258": 258 + 1, + "value259": 259 + 1, + "value260": 260 + 1, + "value261": 261 + 1, + "value262": 262 + 1, + "value263": 263 + 1, + "value264": 264 + 1, + "value265": 265 + 1, + "value266": 266 + 1, + "value267": 267 + 1, + "value268": 268 + 1, + "value269": 269 + 1, + "value270": 270 + 1, + "value271": 271 + 1, + "value272": 272 + 1, + "value273": 273 + 1, + "value274": 274 + 1, + "value275": 275 + 1, + "value276": 276 + 1, + "value277": 277 + 1, + "value278": 278 + 1, + "value279": 279 + 1, + "value280": 280 + 1, + "value281": 281 + 1, + "value282": 282 + 1, + "value283": 283 + 1, + "value284": 284 + 1, + "value285": 285 + 1, + "value286": 286 + 1, + "value287": 287 + 1, + "value288": 288 + 1, + "value289": 289 + 1, + "value290": 290 + 1, + "value291": 291 + 1, + "value292": 292 + 1, + "value293": 293 + 1, + "value294": 294 + 1, + "value295": 295 + 1, + "value296": 296 + 1, + "value297": 297 + 1, + "value298": 298 + 1, + "value299": 299 + 1, + "value300": 300 + 1, + "value301": 301 + 1, + "value302": 302 + 1, + "value303": 303 + 1, + "value304": 304 + 1, + "value305": 305 + 1, + "value306": 306 + 1, + "value307": 307 + 1, + "value308": 308 + 1, + "value309": 309 + 1, + "value310": 310 + 1, + "value311": 311 + 1, + "value312": 312 + 1, + "value313": 313 + 1, + "value314": 314 + 1, + "value315": 315 + 1, + "value316": 316 + 1, + "value317": 317 + 1, + "value318": 318 + 1, + "value319": 319 + 1, + "value320": 320 + 1, + "value321": 321 + 1, + "value322": 322 + 1, + "value323": 323 + 1, + "value324": 324 + 1, + "value325": 325 + 1, + "value326": 326 + 1, + "value327": 327 + 1, + "value328": 328 + 1, + "value329": 329 + 1, + "value330": 330 + 1, + "value331": 331 + 1, + "value332": 332 + 1, + "value333": 333 + 1, + "value334": 334 + 1, + "value335": 335 + 1, + "value336": 336 + 1, + "value337": 337 + 1, + "value338": 338 + 1, + "value339": 339 + 1, + "value340": 340 + 1, + "value341": 341 + 1, + "value342": 342 + 1, + "value343": 343 + 1, + "value344": 344 + 1, + "value345": 345 + 1, + "value346": 346 + 1, + "value347": 347 + 1, + "value348": 348 + 1, + "value349": 349 + 1, + "value350": 350 + 1, + "value351": 351 + 1, + "value352": 352 + 1, + "value353": 353 + 1, + "value354": 354 + 1, + "value355": 355 + 1, + "value356": 356 + 1, + "value357": 357 + 1, + "value358": 358 + 1, + "value359": 359 + 1, + "value360": 360 + 1, + "value361": 361 + 1, + "value362": 362 + 1, + "value363": 363 + 1, + "value364": 364 + 1, + "value365": 365 + 1, + "value366": 366 + 1, + "value367": 367 + 1, + "value368": 368 + 1, + "value369": 369 + 1, + "value370": 370 + 1, + "value371": 371 + 1, + "value372": 372 + 1, + "value373": 373 + 1, + "value374": 374 + 1, + "value375": 375 + 1, + "value376": 376 + 1, + "value377": 377 + 1, + "value378": 378 + 1, + "value379": 379 + 1, + "value380": 380 + 1, + "value381": 381 + 1, + "value382": 382 + 1, + "value383": 383 + 1, + "value384": 384 + 1, + "value385": 385 + 1, + "value386": 386 + 1, + "value387": 387 + 1, + "value388": 388 + 1, + "value389": 389 + 1, + "value390": 390 + 1, + "value391": 391 + 1, + "value392": 392 + 1, + "value393": 393 + 1, + "value394": 394 + 1, + "value395": 395 + 1, + "value396": 396 + 1, + "value397": 397 + 1, + "value398": 398 + 1, + "value399": 399 + 1, + "value400": 400 + 1, + "value401": 401 + 1, + "value402": 402 + 1, + "value403": 403 + 1, + "value404": 404 + 1, + "value405": 405 + 1, + "value406": 406 + 1, + "value407": 407 + 1, + "value408": 408 + 1, + "value409": 409 + 1, + "value410": 410 + 1, + "value411": 411 + 1, + "value412": 412 + 1, + "value413": 413 + 1, + "value414": 414 + 1, + "value415": 415 + 1, + "value416": 416 + 1, + "value417": 417 + 1, + "value418": 418 + 1, + "value419": 419 + 1, + "value420": 420 + 1, + "value421": 421 + 1, + "value422": 422 + 1, + "value423": 423 + 1, + "value424": 424 + 1, + "value425": 425 + 1, + "value426": 426 + 1, + "value427": 427 + 1, + "value428": 428 + 1, + "value429": 429 + 1, + "value430": 430 + 1, + "value431": 431 + 1, + "value432": 432 + 1, + "value433": 433 + 1, + "value434": 434 + 1, + "value435": 435 + 1, + "value436": 436 + 1, + "value437": 437 + 1, + "value438": 438 + 1, + "value439": 439 + 1, + "value440": 440 + 1, + "value441": 441 + 1, + "value442": 442 + 1, + "value443": 443 + 1, + "value444": 444 + 1, + "value445": 445 + 1, + "value446": 446 + 1, + "value447": 447 + 1, + "value448": 448 + 1, + "value449": 449 + 1, + "value450": 450 + 1, + "value451": 451 + 1, + "value452": 452 + 1, + "value453": 453 + 1, + "value454": 454 + 1, + "value455": 455 + 1, + "value456": 456 + 1, + "value457": 457 + 1, + "value458": 458 + 1, + "value459": 459 + 1, + "value460": 460 + 1, + "value461": 461 + 1, + "value462": 462 + 1, + "value463": 463 + 1, + "value464": 464 + 1, + "value465": 465 + 1, + "value466": 466 + 1, + "value467": 467 + 1, + "value468": 468 + 1, + "value469": 469 + 1, + "value470": 470 + 1, + "value471": 471 + 1, + "value472": 472 + 1, + "value473": 473 + 1, + "value474": 474 + 1, + "value475": 475 + 1, + "value476": 476 + 1, + "value477": 477 + 1, + "value478": 478 + 1, + "value479": 479 + 1, + "value480": 480 + 1, + "value481": 481 + 1, + "value482": 482 + 1, + "value483": 483 + 1, + "value484": 484 + 1, + "value485": 485 + 1, + "value486": 486 + 1, + "value487": 487 + 1, + "value488": 488 + 1, + "value489": 489 + 1, + "value490": 490 + 1, + "value491": 491 + 1, + "value492": 492 + 1, + "value493": 493 + 1, + "value494": 494 + 1, + "value495": 495 + 1, + "value496": 496 + 1, + "value497": 497 + 1, + "value498": 498 + 1, + "value499": 499 + 1, + "value500": 500 + 1, + "value501": 501 + 1, + "value502": 502 + 1, +} + +assert len(values.values()) == 502 + +# Try a long dictionary that fails because we have a binary op. +# We can get a expr32 grouping speedup +# which is slower than if this were all constant. +# The above was not implemented at the time this test was written. +values = { + "value1": a + 1, # This is a binary op not consant + "value2": 2, + "value3": 3, + "value4": 4, + "value5": 5, + "value6": 6, + "value7": 7, + "value8": 8, + "value9": 9, + "value10": 10, + "value11": 11, + "value12": 12, + "value13": 13, + "value14": 14, + "value15": 15, + "value16": 16, + "value17": 17, + "value18": 18, + "value19": 19, + "value20": 20, + "value21": 21, + "value22": 22, + "value23": 23, + "value24": 24, + "value25": 25, + "value26": 26, + "value27": 27, + "value28": 28, + "value29": 29, + "value30": 30, + "value31": 31, + "value32": 32, + "value33": 33, +} + +assert len(values.values()) == 33 diff --git a/test/simple_source/looping/04_while1_while1.py b/test/simple_source/looping/04_while1_while1.py index 41106b0c9..bb8762ccb 100644 --- a/test/simple_source/looping/04_while1_while1.py +++ b/test/simple_source/looping/04_while1_while1.py @@ -27,7 +27,7 @@ else: raise RuntimeError -# Degenerate case. Note: we can't run becase this causes an infinite loop. +# Degenerate case. Note: we can't run because this causes an infinite loop. # Suggested in issue #172 while 1: pass diff --git a/test/simple_source/looping/08_while1_if_continue.py b/test/simple_source/looping/08_while1_if_continue.py index de4dd7582..2adc0b4cf 100644 --- a/test/simple_source/looping/08_while1_if_continue.py +++ b/test/simple_source/looping/08_while1_if_continue.py @@ -19,7 +19,7 @@ def _parse(a, b, source, state): else: raise -def _parse2(source, state): +def _parse2(source, state, a, b, this): while 1: if a: if b: @@ -32,10 +32,10 @@ def _parse2(source, state): if b: break - x = 3 + x = this # Bug was in 2.3 decompilation -def _parse3(source, state): +def _parse3(source, state, a, b): while 1: if a: if b: diff --git a/test/simple_source/operation_logic/05_control_flow_bugs.py b/test/simple_source/operation_logic/05_control_flow_bugs.py index f756f2ade..5d89100dc 100644 --- a/test/simple_source/operation_logic/05_control_flow_bugs.py +++ b/test/simple_source/operation_logic/05_control_flow_bugs.py @@ -1,7 +1,8 @@ # From 3.6.10 test_binascii.py -# Bug was getting "while c and noise" parsed correclty +# Bug was getting "while c and noise" parsed correctly # and not put into the "ifelsesmt" + # RUNNABLE! def addnoise(c, noise): while c and noise: @@ -12,6 +13,7 @@ def addnoise(c, noise): noise = False return c + assert addnoise(0, True) == 0 assert addnoise(1, False) == 1 assert addnoise(2, True) == 2 @@ -19,9 +21,10 @@ def addnoise(c, noise): assert addnoise(4, True) == 3 assert addnoise(5, False) == 5 + # From 3.6.10 test_dbm_dumb.py # Bug was getting attaching "else" to the right "if" in the -# presense of a loop. +# presence of a loop. def test_random(a, r): x = 0 for dummy in r: @@ -32,11 +35,13 @@ def test_random(a, r): x += 1 return x + assert test_random(True, [1]) == 2 assert test_random(True, [1, 1]) == 4 assert test_random(False, [1]) == 0 assert test_random(False, [1, 1]) == 0 + # From 2.7.17 test_frozen.py # Bug was getting making sure we have "try" not # "try"/"else" @@ -53,11 +58,13 @@ def test_frozen(a, b): return x + assert test_frozen(1, 1) == 4.0 assert test_frozen(0, 1) == 5.0 assert test_frozen(0.5, 0) == 6.0 assert test_frozen(0, 0.5) == 8.0 + # From 3.6.10 test_binop.py # Bug was getting "other += 3" outside of "if"/"else. def __floordiv__(a, b): @@ -70,6 +77,7 @@ def __floordiv__(a, b): other += 3 return other + assert __floordiv__(True, True) == 4 assert __floordiv__(True, False) == 4 assert __floordiv__(False, True) == 3 diff --git a/test/simple_source/operation_logic/10_mixed_boolean.py b/test/simple_source/operation_logic/10_mixed_boolean.py index d1849cda0..35d64bef7 100644 --- a/test/simple_source/operation_logic/10_mixed_boolean.py +++ b/test/simple_source/operation_logic/10_mixed_boolean.py @@ -1,19 +1,19 @@ # Self-checking test. -# Mixed boolean expresions +# Mixed boolean expressions b = True -assert b, 'b = True' +assert b, "b = True" c = False -assert not c, 'c = False' +assert not c, "c = False" d = True a = b and c or d -assert a, 'b and c or d' +assert a, "b and c or d" a = (b or c) and d -assert a, '(b or c) and d' +assert a, "(b or c) and d" a = b or c or d -assert a, 'b or c or d' +assert a, "b or c or d" a = b and c and d -assert not a, 'b and c and d' +assert not a, "b and c and d" a = b or c and d assert a a = b and (c or d) diff --git a/test/simple_source/stmts/01_rel_import.py b/test/simple_source/stmts/01_rel_import.py index 77293b597..1dafdc546 100644 --- a/test/simple_source/stmts/01_rel_import.py +++ b/test/simple_source/stmts/01_rel_import.py @@ -1,2 +1,4 @@ # Tests relative imports from . import bogus +from .. import foo +from ..bar import baz diff --git a/test/simple_source/stmts/03_if_elif.py b/test/simple_source/stmts/03_if_elif.py index 615a1b723..df51f39c5 100644 --- a/test/simple_source/stmts/03_if_elif.py +++ b/test/simple_source/stmts/03_if_elif.py @@ -1,5 +1,5 @@ # 2.6.9 symbols.py -# Bug in 2.6 is having multple COME_FROMs due to the +# Bug in 2.6 is having multiple COME_FROMs due to the # "and" in the "if" clause # RUNNABLE @@ -10,7 +10,7 @@ assert False # 2.6.9 transformer.py -# Bug in 2.6 is multple COME_FROMs as a result +# Bug in 2.6 is multiple COME_FROMs as a result # of the "or" in the "assert" # In PyPy the assert is handled via PyPy's unique JUMP_IF_NOT_DEBUG @@ -24,6 +24,7 @@ else: pass + # From 3.3.7 test_binop.py # Bug was in ifelsestmt(c) ensuring b+=5 is not in "else" # Also note: ifelsetmtc should not have been used since this @@ -36,6 +37,7 @@ def __floordiv__(a, b): b += 5 return b + assert __floordiv__(1, 1) == 7 assert __floordiv__(1, 0) == 6 assert __floordiv__(0, 3) == 8 diff --git a/test/simple_source/stmts/08_test_contextmanager.py b/test/simple_source/stmts/08_test_contextmanager.py new file mode 100644 index 000000000..9a6ea5f97 --- /dev/null +++ b/test/simple_source/stmts/08_test_contextmanager.py @@ -0,0 +1,21 @@ +""" +This program is self checking! +""" + + +class TestContextManager: + def __enter__(self): + return 1, 2 + + def __exit__(self, exc_type, exc_value, exc_tb): + return self, exc_type, exc_value, exc_tb + + +with open(__file__) as a: + assert a + +with open(__file__) as a, open(__file__) as b: + assert a.read() == b.read() + +with TestContextManager() as a, b: + assert (a, b) == (1, 2) diff --git a/test/simple_source/stmts/11_return_val.py b/test/simple_source/stmts/11_return_val.py index c5076423f..fc13d53b9 100644 --- a/test/simple_source/stmts/11_return_val.py +++ b/test/simple_source/stmts/11_return_val.py @@ -1,6 +1,6 @@ # 2.5.6 decimal.py # Bug on 2.5 and 2.6 by incorrectly changing opcode to -# RETURN_VALUE to psuedo op: RETURN_END_IF +# RETURN_VALUE to pseudo op: RETURN_END_IF def _formatparam(param, value=None, quote=True): if value is not None and len(value) > 0: if isinstance(value, tuple): diff --git a/test/simple_source/stmts/16_bytestring_docstring.py b/test/simple_source/stmts/16_bytestring_docstring.py new file mode 100644 index 000000000..ab700011c --- /dev/null +++ b/test/simple_source/stmts/16_bytestring_docstring.py @@ -0,0 +1,45 @@ +"""Module docstring""" +class A: + b"""Got \xe7\xfe Bytes?""" + assert __doc__ == b"""Got \xe7\xfe Bytes?""" + + def class_func(self): + b"""Got \xe7\xfe Bytes?""" + assert __doc__ == """Module docstring""" + +class B: + """Got no Bytes?""" + assert __doc__ == """Got no Bytes?""" + + def class_func(self): + """Got no Bytes?""" + assert __doc__ == """Module docstring""" + +def single_func(): + """single docstring?""" + assert __doc__ == """Module docstring""" + +def single_byte_func(): + b"""Got \xe7\xfe Bytes?""" + assert __doc__ == """Module docstring""" + +assert __doc__ == """Module docstring""" + +assert single_func.__doc__ == """single docstring?""" +single_func() + +assert single_byte_func.__doc__ == b"""Got \xe7\xfe Bytes?""" +single_byte_func() + +assert A.__doc__ == b"""Got \xe7\xfe Bytes?""" +assert A.class_func.__doc__ == b"""Got \xe7\xfe Bytes?""" +a = A() +assert a.class_func.__doc__ == b"""Got \xe7\xfe Bytes?""" +a.class_func() + +assert B.__doc__ == """Got no Bytes?""" +assert B.class_func.__doc__ == """Got no Bytes?""" +b = B() +assert b.class_func.__doc__ == """Got no Bytes?""" +b.class_func() + diff --git a/test/simple_source/stmts/16_no_bytestring_docstring.py b/test/simple_source/stmts/16_no_bytestring_docstring.py new file mode 100644 index 000000000..be7167994 --- /dev/null +++ b/test/simple_source/stmts/16_no_bytestring_docstring.py @@ -0,0 +1,45 @@ +"""Module docstring""" +class A: + b"""Got \xe7\xfe Bytes?""" + assert __doc__ == """Module docstring""" + + def class_func(self): + b"""Got \xe7\xfe Bytes?""" + assert __doc__ == """Module docstring""" + +class B: + """Got no Bytes?""" + assert __doc__ == """Got no Bytes?""" + + def class_func(self): + """Got no Bytes?""" + assert __doc__ == """Module docstring""" + +def single_func(): + """single docstring?""" + assert __doc__ == """Module docstring""" + +def single_byte_func(): + b"""Got \xe7\xfe Bytes?""" + assert __doc__ == """Module docstring""" + +assert __doc__ == """Module docstring""" + +assert single_func.__doc__ == """single docstring?""" +single_func() + +assert single_byte_func.__doc__ is None +single_byte_func() + +assert A.__doc__ is None +assert A.class_func.__doc__ is None +a = A() +assert a.class_func.__doc__ is None +a.class_func() + +assert B.__doc__ == """Got no Bytes?""" +assert B.class_func.__doc__ == """Got no Bytes?""" +b = B() +assert b.class_func.__doc__ == """Got no Bytes?""" +b.class_func() + diff --git a/test/stdlib/2.4-exclude.sh b/test/stdlib/2.4-exclude.sh index a28d2cc68..3bb058ecd 100644 --- a/test/stdlib/2.4-exclude.sh +++ b/test/stdlib/2.4-exclude.sh @@ -38,17 +38,14 @@ SKIP_TESTS=( [test_winreg.py]=1 # it fails on its own [test_winsound.py]=1 # it fails on its own [test_zlib.py]=1 # it fails on its own - - [test_decimal.py]=1 # + [test_decimal.py]=1 # fails on its own - no module named test_support [test_dis.py]=1 # We change line numbers - duh! - [test_generators.py]=1 # Investigate - [test_grammar.py]=1 # Too many stmts. Handle large stmts + [test_generators.py]=1 # fails on its own - no module named test_support + # [test_grammar.py]=1 # fails on its own - no module tests.test_support [test_grp.py]=1 # Long test - might work Control flow? - [test_pep247.py]=1 # Long test - might work? Control flow? [test_socketserver.py]=1 # -- test takes too long to run: 40 seconds [test_threading.py]=1 # test takes too long to run: 11 seconds [test_thread.py]=1 # test takes too long to run: 36 seconds [test_trace.py]=1 # Long test - works - [test_zipfile64.py]=1 # Runs ok but takes 204 seconds ) # About 243 files, 0 in 19 minutes diff --git a/test/stdlib/2.5-exclude.sh b/test/stdlib/2.5-exclude.sh index 783e50c78..5dc11c569 100644 --- a/test/stdlib/2.5-exclude.sh +++ b/test/stdlib/2.5-exclude.sh @@ -25,7 +25,6 @@ SKIP_TESTS=( [test_nis.py]=1 # it fails on its own [test_normalization.py]=1 # it fails on its own [test_ossaudiodev.py]=1 # it fails on its own - [test_pep277.py]=1 # it fails on its own [test_plistlib.py]=1 # it fails on its own [test_rgbimg.py]=1 # it fails on its own [test_scriptpackages.py]=1 # it fails on its own @@ -44,7 +43,7 @@ SKIP_TESTS=( [test_dis.py]=1 # We change line numbers - duh! [test_file.py]=1 # test assertion failures [test_generators.py]=1 # Investigate - [test_grammar.py]=1 # Too many stmts. Handle large stmts + # [test_grammar.py]=1 # fails on its own - no module tests.test_support [test_grp.py]=1 # Long test - might work Control flow? [test_macfs.py]=1 # it fails on its own [test_macostools.py]=1 # it fails on its own diff --git a/test/stdlib/2.6-exclude.sh b/test/stdlib/2.6-exclude.sh index 298623268..952579a48 100644 --- a/test/stdlib/2.6-exclude.sh +++ b/test/stdlib/2.6-exclude.sh @@ -5,28 +5,26 @@ SKIP_TESTS=( # if filename == 'destfile': # return destfile # assert 0 # shouldn't reach here. - [test_shutil.py]=1 + # [test_shutil.py]=1 # OK but needs PYTHON=pytest - [test___all__.py]=1 # it fails on its own [test___all__.py]=1 # it fails on its own - [test_aepack.py]=1 # Fails on its own - [test_aepack.py]=1 # it fails on its own - [test_al.py]=1 # it fails on its own - [test_anydbm.py]=1 # it fails on its own - [test_applesingle.py]=1 # it fails on its own - + [test_aepack.py]=1 # No module macostools + [test_al.py]=1 # No module macostools + [test_anydbm.py]=pytest + [test_applesingle.py]=1 # No module macostools [test_bsddb185.py]=1 # it fails on its own [test_bsddb3.py]=1 # it fails on its own - [test_bsddb.py]=1 # it fails on its own + [test_bsddb.py]=1 # No module _bsdb - [test_cd.py]=1 # it fails on its own + [test_cd.py]=1 # i# No module cl [test_cl.py]=1 # it fails on its own + [test_cmath.py]=pytest [test_codecmaps_cn.py]=1 # it fails on its own [test_codecmaps_jp.py]=1 # it fails on its own [test_codecmaps_kr.py]=1 # it fails on its own [test_codecmaps_tw.py]=1 # it fails on its own [test_commands.py]=1 # it fails on its own - [test_curses.py]=1 # it fails on its own + [test_curses.py]=1 # needs libncurses.so.5 [test_dbm.py]=1 # it fails on its own [test_descr.py]=1 @@ -35,16 +33,16 @@ SKIP_TESTS=( [test_dl.py]=1 # it fails on its own [test_file.py]=1 # it fails on its own - [test_future5.py]=1 # it fails on its own + [test_future5.py]=pytest - [test_generators.py]=1 # Investigate + [test_generators.py]=pytest [test_gl.py]=1 # it fails on its own - [test_grp.py]=1 # Long test - might work Control flow? + [test_grp.py]=pytest [test_imageop.py]=1 # it fails on its own [test_imaplib.py]=1 # it fails on its own [test_imgfile.py]=1 # it fails on its own - [test_ioctl.py]=1 # it fails on its own + [test_ioctl.py]=pytest [test_kqueue.py]=1 # it fails on its own @@ -54,17 +52,18 @@ SKIP_TESTS=( [test_macostools.py]=1 # it fails on its own [test_mailbox.py]=1 # FIXME: release 3.6.2 may have worked - [test_normalization.py]=1 # it fails on its own + # [test_normalization.py]=1 # it fails on its own [test_ossaudiodev.py]=1 # it fails on its own [test_pep277.py]=1 # it fails on its own - [test_pyclbr.py]=1 # Investigate + [test_pyclbr.py]=1 # it fails on its own [test_py3kwarn.py]=1 # it fails on its own [test_scriptpackages.py]=1 # it fails on its own [test_select.py]=1 # test takes too long to run: 11 seconds + [test_signal.py]=1 # takes more than 15 seconds to run [test_socket.py]=1 # test takes too long to run: 12 seconds [test_startfile.py]=1 # it fails on its own [test_structmembers.py]=1 # it fails on its own @@ -83,9 +82,8 @@ SKIP_TESTS=( [test_winreg.py]=1 # it fails on its own [test_winsound.py]=1 # it fails on its own - [test_zipimport_support.py]=1 - [test_zipfile64.py]=1 # Skip Long test - [test_zlib.py]=1 # + [test_zipimport_support.py]=pytest # expected test to raise ImportError + [test_zipfile.py]=pytest # Skip Long test # .pyenv/versions/2.6.9/lib/python2.6/lib2to3/refactor.pyc # .pyenv/versions/2.6.9/lib/python2.6/pyclbr.pyc ) diff --git a/test/stdlib/2.7-exclude.sh b/test/stdlib/2.7-exclude.sh index 296cd679a..89890d1c5 100644 --- a/test/stdlib/2.7-exclude.sh +++ b/test/stdlib/2.7-exclude.sh @@ -1,38 +1,41 @@ SKIP_TESTS=( - [test_cgi.py]=1 # FIXME: Works on c90ff51 + # raise ValueError("str arguments must be keys in sys.modules") + # ValueError: str arguments must be keys in sys.modules + [test_collections.py]=1 + [test_asyncore.py]=1 + [test_bdb.py]=1 + [test_bisect.py]=1 [test_bsddb3.py]=1 # test takes too long to run: 110 seconds + [test_coercion.py]=1 # Code introspects on co_consts in a non-decompilable way [test_compile.py]=1 # Code introspects on co_consts in a non-decompilable way + [test_complex.py]=1 [test_curses.py]=1 # Possibly fails on its own but not detected [test_cmd_line.py]=1 # Takes too long, maybe hangs, or looking for interactive input? + [test_datetime.py]=1 + [test_decimal.py]=1 + [test_deque.py]=1 + [test_descr.py]=1 + [test_dictcomps.py]=1 [test_dis.py]=1 # We change line numbers - duh! [test_doctest.py]=1 # Fails on its own - [test_exceptions.py]=1 + [test_doctest2.py]=1 # Fails on its own + [test_format.py]=1 # Control flow "and" vs nested "if" - [test_grammar.py]=1 # Too many stmts. Handle large stmts - [test_grp.py]=1 # test takes to long, works interactively though [test_io.py]=1 # Test takes too long to run - [test_ioctl.py]=1 # Test takes too long to run - [test_lib2to3.py]=1 # test takes too long to run: 28 seconds - [test_math.py]=1 [test_memoryio.py]=1 # FIX - [test_modulefinder.py]=1 # FIX [test_multiprocessing.py]=1 # On uncompyle2, takes 24 secs - [test_poll.py]=1 # test takes too long to run: 11 seconds [test_regrtest.py]=1 # [test_runpy.py]=1 # Long and fails on its own - [test_select.py]=1 # Runs okay but takes 11 seconds [test_socket.py]=1 # Runs ok but takes 22 seconds - [test_ssl.py]=1 # + [test_ssl.py]=1 # Fails on its own [test_subprocess.py]=1 # Runs ok but takes 22 seconds [test_sys_settrace.py]=1 # Line numbers are expected to be different - [test_tokenize.py]=1 # test takes too long to run: 19 seconds [test_traceback.py]=1 # Line numbers change - duh. - [test_unicode.py]=1 # Too long to run 11 seconds [test_xpickle.py]=1 # Runs ok but takes 72 seconds [test_zipfile64.py]=1 # Runs ok but takes 204 seconds - [test_zipimport.py]=1 # + [test_zipimport.py]=1 # expected test to raise ImportError ) # 334 unit-test files in about 15 minutes diff --git a/test/stdlib/3.2-exclude.sh b/test/stdlib/3.2-exclude.sh index 5b61dd9b7..87aec9aa5 100644 --- a/test/stdlib/3.2-exclude.sh +++ b/test/stdlib/3.2-exclude.sh @@ -1,15 +1,42 @@ SKIP_TESTS=( - [test_descr.py]=1 # FIXME: Works on c90ff51? + [test_descr.py]=1 + # [test_descr.py]=pytest_module # FIXME: Works on c90ff51? + # AssertionError: 'D(4)C(4)A(4)' != 'D(4)C(4)B(4)A(4)' + # - D(4)C(4)A(4) + # + D(4)C(4)B(4)A(4) + # ? ++++ + + + [test_cmath.py]=1 # Control-flow "elif else -> else: if else" + # [test_cmath.py]=pytest_module + # AssertionError: rect1000: rect(complex(0.0, 0.0)) + # Expected: complex(0.0, 0.0) + # Received: complex(0.0, -1.0) + # Received value insufficiently close to expected value. + [test_cmd_line.py]=1 - [test_collections.py]=1 + + [test_collections.py]=1 # fail on its own + # E TypeError: __new__() takes exactly 4 arguments (1 given) + [test_concurrent_futures.py]=1 # too long to run over 46 seconds by itself - [test_datetimetester.py]=1 - [test_decimal.py]=1 - [test_dictcomps.py]=1 # FIXME: semantic error: actual = {k:v for k in } - [test_doctest.py]=1 # test failures + [test_datetime.py]=pytest_module + + [test_decimal.py]=1 # Fails on its own, even with pytest + + [test_dictcomps.py]=1 + # [test_dictcomps.py]=pytest_module # FIXME: semantic error: actual = {k:v for k in } + # assert (count * 2) <= i + + [test_doctest.py]=1 # Missing pytest fixture + # [test_doctest.py]=pytest_module + # fixture 'coverdir' not found + [test_dis.py]=1 # We change line numbers - duh! + [test_exceptions.py]=1 # parse error + # [test_exceptions.py]=pytest_module # parse error [test_modulefinder.py]=1 # test failures [test_multiprocessing.py]=1 # test takes too long to run: 35 seconds diff --git a/test/stdlib/3.3-exclude.sh b/test/stdlib/3.3-exclude.sh index ea0efb294..170e25e8d 100644 --- a/test/stdlib/3.3-exclude.sh +++ b/test/stdlib/3.3-exclude.sh @@ -10,8 +10,8 @@ SKIP_TESTS=( # tgt.append(elem) [test_itertools.py]=1 - [test_buffer.py]=1 # FIXME: Works on c90ff51 - [test_cmath.py]=1 # FIXME: Works on c90ff51 + [test_buffer.py]=pytest + [test_cmath.py]=pytest [test_atexit.py]=1 # The atexit test starting at 3.3 looks for specific comments in error lines @@ -19,7 +19,6 @@ SKIP_TESTS=( [test_concurrent_futures.py]=1 # too long? [test_decimal.py]=1 # test takes too long to run: 18 seconds - [test_descr.py]=1 # test assertion errors [test_doctest.py]=1 # test assertion errors [test_doctest2.py]=1 # test assertion errors [test_dis.py]=1 # We change line numbers - duh! diff --git a/test/stdlib/3.4-exclude.sh b/test/stdlib/3.4-exclude.sh index 641cf906a..b398b8e99 100644 --- a/test/stdlib/3.4-exclude.sh +++ b/test/stdlib/3.4-exclude.sh @@ -26,7 +26,16 @@ SKIP_TESTS=( [test_dbm_gnu.py]=1 # fails on its own [test_devpoll.py]=1 # it fails on its own + [test_descr.py]=1 # test assertion errors + # ERROR: test_reent_set_bases_on_base (__main__.MroTest) + # Traceback (most recent call last): + # File "test_descr.py", line 5521, in test_reent_set_bases_on_base + # class A(metaclass=M): + # File "test_descr.py", line 5472, in __new__ + # return type.__new__(mcls, name, bases, attrs) + # TypeError: 'NoneType' object is not iterable + [test_dis.py]=1 # We change line numbers - duh! [test_distutils.py]=1 # it fails on its own [test_doctest2.py]=1 diff --git a/test/stdlib/3.6-exclude.sh b/test/stdlib/3.6-exclude.sh index 13aa30974..e7ae174f4 100644 --- a/test/stdlib/3.6-exclude.sh +++ b/test/stdlib/3.6-exclude.sh @@ -1,6 +1,6 @@ SKIP_TESTS=( [test_ast.py]=1 # FIXME: Works on c90ff51 - [test_cmath.py]=1 # FIXME: Works on c90ff51 + [test_cmath.py]=1 # fails on its own [test_format.py]=1 # FIXME: Works on c90ff51 [test_ftplib.py]=1 # FIXME: Works on c90ff51 [test_slice.py]=1 # FIXME: Works on c90ff51 @@ -12,7 +12,6 @@ SKIP_TESTS=( [test_aifc.py]=1 # [test_argparse.py]=1 # it fails on its own [test_asdl_parser.py]=1 # it fails on its own - [test_asyncgen.py]=1 # parse error [test_atexit.py]=1 # The atexit test looks for specific comments in error lines [test_baseexception.py]=1 # test assert error @@ -39,9 +38,9 @@ SKIP_TESTS=( [test_collections.py]= # it fails on its own [test_compile.py]=1 # Code introspects on co_consts in a non-decompilable way [test_concurrent_futures.py]=1 # Takes long - [test_contextlib.py]=1 # test assertion failure - [test_contextlib_async.py]=1 # Investigate - [test_coroutines.py]=1 # parse error + + # [test_coroutines.py]=1 # FIXME: async parse error + [test_curses.py]=1 # Parse error [test_ctypes.py]=1 # it fails on its own diff --git a/test/stdlib/3.7-exclude.sh b/test/stdlib/3.7-exclude.sh index f17247b28..08462fa44 100644 --- a/test/stdlib/3.7-exclude.sh +++ b/test/stdlib/3.7-exclude.sh @@ -14,22 +14,19 @@ SKIP_TESTS=( # complicated control flow and "and/or" expressions [test_pickle.py]=1 - [test_builtin.py]=1 # FIXME works on decompyle6 [test_context.py]=1 # FIXME works on decompyle6 - [test_doctest2.py]=1 # FIXME works on decompyle6 [test_format.py]=1 # FIXME works on decompyle6 [test_marshal.py]=1 # FIXME works on decompyle6 [test_normalization.py]=1 # FIXME works on decompyle6 [test_os.py]=1 # FIXME works on decompyle6 - [test_pow.py]=1 # FIXME works on decompyle6 [test_slice.py]=1 # FIXME works on decompyle6 [test_sort.py]=1 # FIXME works on decompyle6 [test_statistics.py]=1 # FIXME works on decompyle6 [test_timeit.py]=1 # FIXME works on decompyle6 [test_urllib2_localnet.py]=1 # FIXME works on decompyle6 [test_urllib2.py]=1 # FIXME: works on uncompyle6 - [test_generators.py]=1 # FIXME: works on uncompyle6 - lambda parsing probably - [test_grammar.py]=1 # FIXME: works on uncompyle6 - lambda parsing probably + [test_generators.py]=1 # File "test_generators.py", line 44, in test_raise_and_yield_from self.assertEqual(exc.value, 'PASSED') + [test_grammar.py]=1 # FIXME: invalid syntax: l4 = lambda x=lambda y=lambda z=1: z: y(): x() [test___all__.py]=1 # it fails on its own [test_argparse.py]=1 #- it fails on its own @@ -39,7 +36,7 @@ SKIP_TESTS=( [test_bdb.py]=1 # [test_buffer.py]=1 # parse error [test_clinic.py]=1 # it fails on its own - [test_cmath.py]=1 # test assertion failure + [test_cmath.py]=pytest [test_cmd_line.py]=1 # Interactive? [test_cmd_line_script.py]=1 [test_compileall.py]=1 # fails on its own @@ -130,7 +127,6 @@ SKIP_TESTS=( [test_traceback.py]=1 # Probably uses comment for testing [test_tracemalloc.py]=1 # test assert failres [test_ttk_guionly.py]=1 # implementation specfic and test takes too long to run: 19 seconds - [test_ttk_guionly.py]=1 # implementation specfic and test takes too long to run: 19 seconds [test_typing.py]=1 # parse error [test_types.py]=1 # parse error diff --git a/test/stdlib/3.8-exclude.sh b/test/stdlib/3.8-exclude.sh index d8e87b4c9..61f8700cd 100644 --- a/test/stdlib/3.8-exclude.sh +++ b/test/stdlib/3.8-exclude.sh @@ -28,12 +28,12 @@ SKIP_TESTS=( # These and the above may be due to new code generation or tests # between 3.8.3 and 3.8.5 ? - [test_decorators.py]=1 # + [test_decorators.py]=1 # parse error - [test_dtrace.py]=1 # - [test_exceptions.py]=1 # + [test_dtrace.py]=1 # parse error + [test_exceptions.py]=1 # parse error [test_ftplib.py]=1 # - [test_gc.py]=1 # + [test_gc.py]=1 # FIXME: return return strip_python_stderr(stderr) [test_gzip.py]=1 # [test_hashlib.py]=1 # [test_iter.py]=1 # @@ -51,7 +51,6 @@ SKIP_TESTS=( [test_audioop.py]=1 # test failure [test_audit.py]=1 # parse error - [test_base64.py]=1 # parse error [test_baseexception.py]=1 # [test_bigaddrspace.py]=1 # parse error [test_bigmem.py]=1 # parse error @@ -69,7 +68,7 @@ SKIP_TESTS=( [test_cgi.py]=1 # parse error [test_cgitb.py]=1 # parse error [test_clinic.py]=1 # it fails on its own - [test_cmath.py]=1 # test assertion failure + [test_cmath.py]=pytest [test_cmd.py]=1 # parse error [test_cmd_line.py]=1 # Interactive? [test_cmd_line_script.py]=1 diff --git a/test/stdlib/runtests.sh b/test/stdlib/runtests.sh index 1b3e579df..22719c44d 100755 --- a/test/stdlib/runtests.sh +++ b/test/stdlib/runtests.sh @@ -1,6 +1,9 @@ #!/bin/bash me=${BASH_SOURCE[0]} +# Note: for 2.6 sometimes we need to set PYTHON=pytest +PYTHON=${PYTHON:-python} + typeset -i BATCH=${BATCH:-0} if (( ! BATCH )) ; then isatty=$(/usr/bin/tty 2>/dev/null) @@ -26,7 +29,19 @@ function displaytime { # Python version setup FULLVERSION=$(pyenv local) PYVERSION=${FULLVERSION%.*} -MINOR=${FULLVERSION##?.?.} + +if [[ $PYVERSION =~ 'pypy' ]] ; then + IS_PYPY=1 +else + IS_PYPY=0 +fi + +if [[ $FULLVERSION =~ pypy([2-3])\.([7-9]) ]]; then + MAJOR="${BASH_REMATCH[1]}" + MINOR="${BASH_REMATCH[2]}" +else + MINOR=${FULLVERSION##?.?.} +fi STOP_ONERROR=${STOP_ONERROR:-1} @@ -147,13 +162,14 @@ done mkdir $TESTDIR || exit $? -cp -r ${PYENV_ROOT}/versions/${PYVERSION}.${MINOR}/lib/python${PYVERSION}/test $TESTDIR -if [[ $PYVERSION == 3.2 ]] ; then - cp ${PYENV_ROOT}/versions/${PYVERSION}.${MINOR}/lib/python${PYVERSION}/test/* $TESTDIR - cd $TESTDIR + +if ((IS_PYPY)); then + cp -r ${PYENV_ROOT}/versions/${PYVERSION}.${MINOR}/lib-python/${MAJOR}/test $TESTDIR else - cd $TESTDIR/test + cp -r ${PYENV_ROOT}/versions/${PYVERSION}.${MINOR}/lib/python${PYVERSION}/test $TESTDIR fi +cd $TESTDIR/test + pyenv local $FULLVERSION export PYTHONPATH=$TESTDIR export PATH=${PYENV_ROOT}/shims:${PATH} @@ -167,7 +183,11 @@ if [[ -n $1 ]] ; then files=$@ typeset -a files_ary=( $(echo $@) ) if (( ${#files_ary[@]} == 1 || DONT_SKIP_TESTS == 1 )) ; then - SKIP_TESTS=() + for file in $files; do + if (( SKIP_TESTS[$file] != "pytest" || SKIP_TESTS[$file] != "pytest_module" )); then + SKIP_TESTS[$file]=1; + fi + done fi else files=$(echo test_*.py) @@ -181,9 +201,16 @@ NOT_INVERTED_TESTS=${NOT_INVERTED_TESTS:-1} for file in $files; do # AIX bash doesn't grok [[ -v SKIP... ]] [[ -z ${SKIP_TESTS[$file]} ]] && SKIP_TESTS[$file]=0 - if [[ ${SKIP_TESTS[$file]} == ${NOT_INVERTED_TESTS} ]] ; then - ((skipped++)) - continue + + if [[ ${SKIP_TESTS[$file]} == "pytest" ]]; then + PYTHON=pytest + elif [[ ${SKIP_TESTS[$file]} == "pytest_module" ]]; then + PYTHON="$PYTHON -m pytest" + else + if [[ ${SKIP_TESTS[$file]}s == ${NOT_INVERTED_TESTS} ]] ; then + ((skipped++)) + continue + fi fi # If the fails *before* decompiling, skip it! @@ -191,7 +218,7 @@ for file in $files; do if [ ! -r $file ]; then echo "Skipping test $file -- not readable. Does it exist?" continue - elif ! python $file >/dev/null 2>&1 ; then + elif ! $PYTHON $file >/dev/null 2>&1 ; then echo "Skipping test $file -- it fails on its own" continue fi @@ -206,7 +233,11 @@ for file in $files; do ((i++)) # (( i > 40 )) && break short_name=$(basename $file .py) - decompiled_file=$short_name-${PYVERSION}.pyc + if ((IS_PYPY)); then + decompiled_file=$short_name-${MAJOR}.${MINOR}.pyc + else + decompiled_file=$short_name-${PYVERSION}.pyc + fi $fulldir/compile-file.py $file && \ mv $file{,.orig} && \ echo ========== $(date +%X) Decompiling $file =========== @@ -214,7 +245,7 @@ for file in $files; do rc=$? if (( rc == 0 )) ; then echo ========== $(date +%X) Running $file =========== - timeout_cmd python $file + timeout_cmd $PYTHON $file rc=$? else echo ======= Skipping $file due to compile/decompile errors ======== diff --git a/test/test_pyenvlib.py b/test/test_pyenvlib.py index 7b76e5edc..59ac9cede 100755 --- a/test/test_pyenvlib.py +++ b/test/test_pyenvlib.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # emacs-mode: -*-python-*- """ -test_pyenvlib -- uncompyle and verify Python libraries +test_pyenvlib -- decompile and verify Python libraries Usage-Examples: @@ -20,14 +20,19 @@ test_pyenvlib --mylib --verify # decompile verify 'mylib' """ -from __future__ import print_function - -import os, time, re, shutil, sys +# Does not work on 2.5.9 or before +# from __future__ import print_function +import os +import re +import shutil +import sys +import time from fnmatch import fnmatch -from uncompyle6 import main import xdis.magics as magics +from uncompyle6 import main + # ----- configure this for your needs python_versions = [v for v in magics.python_versions if re.match("^[0-9.]+$", v)] @@ -82,6 +87,7 @@ if vers == "native": short_vers = os.path.basename(sys.path[-1]) from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str + if PYTHON_VERSION_TRIPLE > (3, 0): version = version_tuple_to_str(end=2) PYC = f"*.cpython-{version}.pyc" @@ -133,8 +139,17 @@ def visitor(files, dirname, names): pass if len(files) > max_files: - files = [file for file in files if not "site-packages" in file and (file.endswith(".pyo") or file.endswith(".pyc"))] - files = [file for file in files if not "test" in file and (file.endswith(".pyo") or file.endswith(".pyc"))] + files = [ + file + for file in files + if not "site-packages" in file + and (file.endswith(".pyo") or file.endswith(".pyc")) + ] + files = [ + file + for file in files + if not "test" in file and (file.endswith(".pyo") or file.endswith(".pyc")) + ] if len(files) > max_files: # print("Number of files %d - truncating to last 200" % len(files)) print( @@ -151,7 +166,8 @@ def visitor(files, dirname, names): if __name__ == "__main__": - import getopt, sys + import getopt + import sys do_coverage = do_verify = False test_dirs = [] diff --git a/test/test_pythonlib.py b/test/test_pythonlib.py index cbef614fb..99d2f90b3 100755 --- a/test/test_pythonlib.py +++ b/test/test_pythonlib.py @@ -29,11 +29,18 @@ from __future__ import print_function -import getopt, os, py_compile, sys, shutil, tempfile, time - +import getopt +import os +import py_compile +import shutil +import sys +import tempfile +import time from fnmatch import fnmatch + +from xdis.version_info import PYTHON_VERSION_TRIPLE + from uncompyle6.main import main -from xdis.version_info import PYTHON_VERSION def get_srcdir(): @@ -164,10 +171,10 @@ def file_matches(files, root, basenames, patterns): if opts["do_compile"]: compiled_version = opts["compiled_version"] - if compiled_version and PYTHON_VERSION != compiled_version: + if compiled_version and PYTHON_VERSION_TRIPLE != compiled_version: print( "Not compiling: desired Python version is %s but we are running %s" - % (compiled_version, PYTHON_VERSION), + % (compiled_version, PYTHON_VERSION_TRIPLE), file=sys.stderr, ) else: @@ -209,7 +216,7 @@ def file_matches(files, root, basenames, patterns): print("Output directory: ", target_dir) try: _, _, failed_files, failed_verify = main( - src_dir, target_dir, files, [], do_verify=opts["do_verify"] + src_dir, target_dir, files, [] ) if failed_files != 0: sys.exit(2) diff --git a/uncompyle6/__init__.py b/uncompyle6/__init__.py index 21e5062c4..1be71c17d 100644 --- a/uncompyle6/__init__.py +++ b/uncompyle6/__init__.py @@ -1,5 +1,5 @@ """ - Copyright (c) 2015, 2018, 2021-2022 by Rocky Bernstein + Copyright (c) 2015, 2018, 2021-2022, 2025 by Rocky Bernstein Copyright (c) 2000 by hartmut Goebel Copyright (c) 1999 John Aycock @@ -36,21 +36,17 @@ # pyston doesn't have setrecursionlimit sys.setrecursionlimit(5000) -from uncompyle6.semantics import fragments, pysource - # Export some functions from uncompyle6.main import decompile_file # noqa +from uncompyle6.semantics.pysource import code_deparse, deparse_code2str # Convenience functions so you can say: # from uncompyle6 import (code_deparse, deparse_code2str) -from uncompyle6.semantics.pysource import code_deparse, deparse_code2str __all__ = [ "__version__", "code_deparse", "decompile_file", "deparse_code2str", - "fragments", - "pysource", ] diff --git a/uncompyle6/bin/pydisassemble.py b/uncompyle6/bin/pydisassemble.py index 9f43f7af3..e40e0dfc4 100755 --- a/uncompyle6/bin/pydisassemble.py +++ b/uncompyle6/bin/pydisassemble.py @@ -1,12 +1,27 @@ #!/usr/bin/env python -# Mode: -*- python -*- # -# Copyright (c) 2015-2016, 2018, 2020, 2022 by Rocky Bernstein +# Copyright (c) 2015-2016, 2018, 2020, 2022-2024 +# by Rocky Bernstein +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . # -from __future__ import print_function -import sys, os, getopt -from uncompyle6.disas import disassemble_file +import getopt +import os +import sys + +from uncompyle6.code_fns import disassemble_file from uncompyle6.version import __version__ program, ext = os.path.splitext(os.path.basename(__file__)) @@ -20,7 +35,7 @@ assist uncompyle6 in parsing the instruction stream. For example instructions with variable-length arguments like CALL_FUNCTION and BUILD_LIST have argument counts appended to the instruction name, and -COME_FROM psuedo instructions are inserted into the instruction stream. +COME_FROM pseudo instructions are inserted into the instruction stream. Bit flag values encoded in an operand are expanding, EXTENDED_ARG value are folded into the following instruction operand. @@ -40,47 +55,53 @@ -V | --version show version and stop -h | --help show this message -""".format(program) +""".format( + program +) + +PATTERNS = ("*.pyc", "*.pyo") -PATTERNS = ('*.pyc', '*.pyo') def main(): - Usage_short = """usage: %s FILE... -Type -h for for full help.""" % program + usage_short = ( + f"""usage: {program} FILE... +Type -h for for full help.""" + ) if len(sys.argv) == 1: print("No file(s) given", file=sys.stderr) - print(Usage_short, file=sys.stderr) + print(usage_short, file=sys.stderr) sys.exit(1) try: - opts, files = getopt.getopt(sys.argv[1:], 'hVU', - ['help', 'version', 'uncompyle6']) + opts, files = getopt.getopt( + sys.argv[1:], "hVU", ["help", "version", "uncompyle6"] + ) except getopt.GetoptError as e: - print('%s: %s' % (os.path.basename(sys.argv[0]), e), file=sys.stderr) + print(f"{os.path.basename(sys.argv[0])}: {e}", file=sys.stderr) sys.exit(-1) for opt, val in opts: - if opt in ('-h', '--help'): + if opt in ("-h", "--help"): print(__doc__) sys.exit(1) - elif opt in ('-V', '--version'): - print("%s %s" % (program, __version__)) + elif opt in ("-V", "--version"): + print(f"{program} {__version__}") sys.exit(0) else: print(opt) - print(Usage_short, file=sys.stderr) + print(usage_short, file=sys.stderr) sys.exit(1) for file in files: if os.path.exists(files[0]): disassemble_file(file, sys.stdout) else: - print("Can't read %s - skipping" % files[0], - file=sys.stderr) + print(f"Can't read {files[0]} - skipping", file=sys.stderr) pass pass return -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/uncompyle6/bin/uncompile.py b/uncompyle6/bin/uncompile.py index 90f409001..3fc7fc332 100755 --- a/uncompyle6/bin/uncompile.py +++ b/uncompyle6/bin/uncompile.py @@ -1,170 +1,188 @@ #!/usr/bin/env python # Mode: -*- python -*- # -# Copyright (c) 2015-2017, 2019-2020 by Rocky Bernstein +# Copyright (c) 2015-2017, 2019-2020, 2023-2025 +# by Rocky Bernstein # Copyright (c) 2000-2002 by hartmut Goebel # from __future__ import print_function -import sys, os, getopt, time + +import os +import sys +import time +from typing import List + +import click from xdis.version_info import version_tuple_to_str -program = 'uncompyle6' - -__doc__ = """ -Usage: - %s [OPTIONS]... [ FILE | DIR]... - %s [--help | -h | --V | --version] - -Examples: - %s foo.pyc bar.pyc # decompile foo.pyc, bar.pyc to stdout - %s -o . foo.pyc bar.pyc # decompile to ./foo.pyc_dis and ./bar.pyc_dis - %s -o /tmp /usr/lib/python1.5 # decompile whole library - -Options: - -o output decompiled files to this path: - if multiple input files are decompiled, the common prefix - is stripped from these names and the remainder appended to - - uncompyle6 -o /tmp bla/fasel.pyc bla/foo.pyc - -> /tmp/fasel.pyc_dis, /tmp/foo.pyc_dis - uncompyle6 -o /tmp bla/fasel.pyc bar/foo.pyc - -> /tmp/bla/fasel.pyc_dis, /tmp/bar/foo.pyc_dis - uncompyle6 -o /tmp /usr/lib/python1.5 - -> /tmp/smtplib.pyc_dis ... /tmp/lib-tk/FixTk.pyc_dis - --compile | -c - attempts a decompilation after compiling - -d print timestamps - -p use number of processes - -r recurse directories looking for .pyc and .pyo files - --fragments use fragments deparser - --verify compare generated source with input byte-code - --verify-run compile generated source, run it and check exit code - --syntax-verify compile generated source - --linemaps generated line number correspondencies between byte-code - and generated source output - --encoding - use in generated source according to pep-0263 - --help show this message - -Debugging Options: - --asm | -a include byte-code (disables --verify) - --grammar | -g show matching grammar - --tree={before|after} - -t {before|after} include syntax before (or after) tree transformation - (disables --verify) - --tree++ | -T add template rules to --tree=before when possible - -Extensions of generated files: - '.pyc_dis' '.pyo_dis' successfully decompiled (and verified if --verify) - + '_unverified' successfully decompile but --verify failed - + '_failed' decompile failed (contact author for enhancement) -""" % ((program,) * 5) - -program = 'uncompyle6' - -from uncompyle6 import verify from uncompyle6.main import main, status_msg +from uncompyle6.verify import VerifyCmpError from uncompyle6.version import __version__ +program = "uncompyle6" + + def usage(): print(__doc__) sys.exit(1) -def main_bin(): - if not (sys.version_info[0:2] in ((2, 6), (2, 7), (3, 0), - (3, 1), (3, 2), (3, 3), - (3, 4), (3, 5), (3, 6), - (3, 7), (3, 8), (3, 9), (3, 10) - )): +# __doc__ = """ +# Usage: +# %s [OPTIONS]... [ FILE | DIR]... +# %s [--help | --version] + +# Examples: +# %s foo.pyc bar.pyc # decompile foo.pyc, bar.pyc to stdout +# %s -o . foo.pyc bar.pyc # decompile to ./foo.pyc_dis and ./bar.pyc_dis +# %s -o /tmp /usr/lib/python1.5 # decompile whole library + +# Options: +# -o output decompiled files to this path: +# if multiple input files are decompiled, the common prefix +# is stripped from these names and the remainder appended to +# +# uncompyle6 -o /tmp bla/fasel.pyc bla/foo.pyc +# -> /tmp/fasel.pyc_dis, /tmp/foo.pyc_dis +# uncompyle6 -o /tmp bla/fasel.pyc bar/foo.pyc +# -> /tmp/bla/fasel.pyc_dis, /tmp/bar/foo.pyc_dis +# uncompyle6 -o /tmp /usr/lib/python1.5 +# -> /tmp/smtplib.pyc_dis ... /tmp/lib-tk/FixTk.pyc_dis +# --compile | -c +# attempts a decompilation after compiling +# -d print timestamps +# -p use number of processes +# -r recurse directories looking for .pyc and .pyo files +# --fragments use fragments deparser +# --verify compare generated source with input byte-code +# --verify-run compile generated source, run it and check exit code +# --syntax-verify compile generated source +# --linemaps generated line number correspondencies between byte-code +# and generated source output +# --encoding +# use in generated source according to pep-0263 +# --help show this message + +# Debugging Options: +# --asm | -a include byte-code (disables --verify) +# --grammar | -g show matching grammar +# --tree={before|after} +# -t {before|after} include syntax before (or after) tree transformation +# (disables --verify) +# --tree++ | -T add template rules to --tree=before when possible + +# Extensions of generated files: +# '.pyc_dis' '.pyo_dis' successfully decompiled (and verified if --verify) +# + '_unverified' successfully decompile but --verify failed +# + '_failed' decompile failed (contact author for enhancement) +# """ % ( +# (program,) * 5 +# ) + + +@click.command(context_settings={"help_option_names": ["--help", "-help", "-h"]}) +@click.option( + "--asm++/--no-asm++", + "-A", + "asm_plus", + default=False, + help="show xdis assembler and tokenized assembler", +) +@click.option("--asm/--no-asm", "-a", default=False) +@click.option("--grammar/--no-grammar", "-g", "show_grammar", default=False) +@click.option("--tree/--no-tree", "-t", default=False) +@click.option( + "--tree++/--no-tree++", + "-T", + "tree_plus", + default=False, + help="show parse tree and Abstract Syntax Tree", +) +@click.option( + "--linemaps/--no-linemaps", + default=False, + help="show line number correspondencies between byte-code " + "and generated source output", +) +@click.option( + "--verify", + type=click.Choice(["run", "syntax"]), + default=None, +) +@click.option( + "--recurse/--no-recurse", + "-r", + "recurse_dirs", + default=False, +) +@click.option( + "--output", + "-o", + "outfile", + type=click.Path( + exists=True, file_okay=True, dir_okay=True, writable=True, resolve_path=True + ), + required=False, +) +@click.version_option(version=__version__) +@click.option( + "--start-offset", + "start_offset", + default=0, + help="start decomplation at offset; default is 0 or the starting offset.", +) +@click.option( + "--stop-offset", + "stop_offset", + default=-1, + help="stop decomplation when seeing an offset greater or equal to this; default is " + "-1 which indicates no stopping point.", +) +@click.argument("files", nargs=-1, type=click.Path(readable=True), required=True) +def main_bin( + asm: bool, + asm_plus: bool, + show_grammar, + tree: bool, + tree_plus: bool, + linemaps: bool, + verify, + recurse_dirs: bool, + outfile, + start_offset: int, + stop_offset: int, + files, +): + """ + Cross Python bytecode decompiler for Python bytecode up to Python 3.8. + """ + + version_tuple = sys.version_info[0:2] + if version_tuple < (3, 6): print( - f"Error: {program} can decompile only bytecode from Python 3.7" - f""" to 3.8.\n\tYou have version: {version_tuple_to_str()}.""" + f"Error: This version of the {program} runs from Python 3.6 or greater." + f"You need another branch of this code for Python before 3.6." + f""" \n\tYou have version: {version_tuple_to_str()}.""" ) sys.exit(-1) - do_verify = recurse_dirs = False numproc = 0 - outfile = '-' out_base = None - source_paths = [] + + out_base = None + source_paths: List[str] = [] timestamp = False timestampfmt = "# %Y.%m.%d %H:%M:%S %Z" + pyc_paths = files - try: - opts, pyc_paths = getopt.getopt(sys.argv[1:], 'hac:gtTdrVo:p:', - 'help asm compile= grammar linemaps recurse ' - 'timestamp tree= tree+ ' - 'fragments verify verify-run version ' - 'syntax-verify ' - 'showgrammar encoding='.split(' ')) - except getopt.GetoptError as e: - print('%s: %s' % (os.path.basename(sys.argv[0]), e), file=sys.stderr) - sys.exit(-1) - - options = {} - for opt, val in opts: - if opt in ('-h', '--help'): - print(__doc__) - sys.exit(0) - elif opt in ('-V', '--version'): - print("%s %s" % (program, __version__)) - sys.exit(0) - elif opt == '--verify': - options['do_verify'] = 'strong' - elif opt == '--syntax-verify': - options['do_verify'] = 'weak' - elif opt == '--fragments': - options['do_fragments'] = True - elif opt == '--verify-run': - options['do_verify'] = 'verify-run' - elif opt == '--linemaps': - options['do_linemaps'] = True - elif opt in ('--asm', '-a'): - options['showasm'] = 'after' - options['do_verify'] = None - elif opt in ('--tree', '-t'): - if 'showast' not in options: - options['showast'] = {} - if val == 'before': - options['showast'][val] = True - elif val == 'after': - options['showast'][val] = True - else: - options['showast']['before'] = True - options['do_verify'] = None - elif opt in ('--tree+', '-T'): - if 'showast' not in options: - options['showast'] = {} - options['showast']['after'] = True - options['showast']['before'] = True - options['do_verify'] = None - elif opt in ('--grammar', '-g'): - options['showgrammar'] = True - elif opt == '-o': - outfile = val - elif opt in ('--timestamp', '-d'): - timestamp = True - elif opt in ('--compile', '-c'): - source_paths.append(val) - elif opt == '-p': - numproc = int(val) - elif opt in ('--recurse', '-r'): - recurse_dirs = True - elif opt == '--encoding': - options['source_encoding'] = val - else: - print(opt, file=sys.stderr) - usage() - - # expand directory if specified + # Expand directory if "recurse" was specified. if recurse_dirs: expanded_files = [] for f in pyc_paths: if os.path.isdir(f): for root, _, dir_files in os.walk(f): for df in dir_files: - if df.endswith('.pyc') or df.endswith('.pyo'): + if df.endswith(".pyc") or df.endswith(".pyo"): expanded_files.append(os.path.join(root, df)) pyc_paths = expanded_files @@ -175,38 +193,58 @@ def main_bin(): if src_base[-1:] != os.sep: src_base = os.path.dirname(src_base) if src_base: - sb_len = len( os.path.join(src_base, '') ) + sb_len = len(os.path.join(src_base, "")) pyc_paths = [f[sb_len:] for f in pyc_paths] if not pyc_paths and not source_paths: print("No input files given to decompile", file=sys.stderr) usage() - if outfile == '-': - outfile = None # use stdout + if outfile == "-": + outfile = None # use stdout elif outfile and os.path.isdir(outfile): - out_base = outfile; outfile = None + out_base = outfile + outfile = None elif outfile and len(pyc_paths) > 1: - out_base = outfile; outfile = None + out_base = outfile + outfile = None + + # A second -a turns show_asm="after" into show_asm="before" + if asm_plus or asm: + asm_opt = "both" if asm_plus else "after" + else: + asm_opt = None if timestamp: print(time.strftime(timestampfmt)) if numproc <= 1: + show_ast = {"before": tree or tree_plus, "after": tree_plus} try: - result = main(src_base, out_base, pyc_paths, source_paths, outfile, - **options) - result = list(result) + [options.get('do_verify', None)] + result = main( + src_base, + out_base, + pyc_paths, + source_paths, + outfile, + showasm=asm_opt, + showgrammar=show_grammar, + showast=show_ast, + do_verify=verify, + do_linemaps=linemaps, + start_offset=start_offset, + stop_offset=stop_offset, + ) if len(pyc_paths) > 1: - mess = status_msg(do_verify, *result) - print('# ' + mess) + mess = status_msg(*result) + print("# " + mess) pass except ImportError as e: print(str(e)) sys.exit(2) - except (KeyboardInterrupt): + except KeyboardInterrupt: pass - except verify.VerifyCmpError: + except VerifyCmpError: raise else: from multiprocessing import Process, Queue @@ -216,7 +254,7 @@ def main_bin(): except ImportError: from queue import Empty - fqueue = Queue(len(pyc_paths)+numproc) + fqueue = Queue(len(pyc_paths) + numproc) for f in pyc_paths: fqueue.put(f) for i in range(numproc): @@ -224,15 +262,21 @@ def main_bin(): rqueue = Queue(numproc) + tot_files = okay_files = failed_files = verify_failed_files = 0 + def process_func(): + (tot_files, okay_files, failed_files, verify_failed_files) = ( + 0, + 0, + 0, + 0, + ) try: - (tot_files, okay_files, failed_files, verify_failed_files) = (0, 0, 0, 0) while 1: f = fqueue.get() if f is None: break - (t, o, f, v) = \ - main(src_base, out_base, [f], [], outfile, **options) + (t, o, f, v) = main(src_base, out_base, [f], [], outfile) tot_files += t okay_files += o failed_files += f @@ -249,7 +293,12 @@ def process_func(): for p in procs: p.join() try: - (tot_files, okay_files, failed_files, verify_failed_files) = (0, 0, 0, 0) + (tot_files, okay_files, failed_files, verify_failed_files) = ( + 0, + 0, + 0, + 0, + ) while True: (t, o, f, v) = rqueue.get(False) tot_files += t @@ -258,8 +307,10 @@ def process_func(): verify_failed_files += v except Empty: pass - print('# decompiled %i files: %i okay, %i failed, %i verify failed' % - (tot_files, okay_files, failed_files, verify_failed_files)) + print( + "# decompiled %i files: %i okay, %i failed, %i verify failed" + % (tot_files, okay_files, failed_files, verify_failed_files) + ) except (KeyboardInterrupt, OSError): pass @@ -268,5 +319,6 @@ def process_func(): return -if __name__ == '__main__': + +if __name__ == "__main__": main_bin() diff --git a/uncompyle6/disas.py b/uncompyle6/code_fns.py similarity index 72% rename from uncompyle6/disas.py rename to uncompyle6/code_fns.py index cb8cc7e39..1eb6e0905 100644 --- a/uncompyle6/disas.py +++ b/uncompyle6/code_fns.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2016, 2818-2021 by Rocky Bernstein +# Copyright (c) 2015-2016, 2818-2022, 2024 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock @@ -17,10 +17,12 @@ # along with this program. If not, see . """ -CPython magic- and version- independent disassembly routines +CPython magic- and version-independent disassembly routines There are two reasons we can't use Python's built-in routines -from dis. First, the bytecode we are extracting may be from a different +from ``dis``. + +First, the bytecode we are extracting may be from a different version of Python (different magic number) than the version of Python that is doing the extraction. @@ -33,18 +35,19 @@ from collections import deque from xdis import check_object_path, iscode, load_module -from xdis.version_info import version_tuple_to_str +from xdis.version_info import PythonImplementation, version_tuple_to_str + from uncompyle6.scanner import get_scanner def disco(version, co, out=None, is_pypy=False): """ - diassembles and deparses a given code block 'co' + disassembles and deparses a given code block ``co``. """ assert iscode(co) - # store final output stream for case of error + # Store final output stream in case there is an error. real_out = out or sys.stdout print("# Python %s" % version_tuple_to_str(version), file=real_out) if co.co_filename: @@ -60,11 +63,17 @@ def disco_loop(disasm, queue, real_out): while len(queue) > 0: co = queue.popleft() if co.co_name != "": - print( - "\n# %s line %d of %s" - % (co.co_name, co.co_firstlineno, co.co_filename), - file=real_out, - ) + if hasattr(co, "co_firstlineno"): + print( + "\n# %s line %d of %s" + % (co.co_name, co.co_firstlineno, co.co_filename), + file=real_out, + ) + else: + print( + "\n# %s of %s" % (co.co_name, co.co_filename), + file=real_out, + ) tokens, customize = disasm(co) for t in tokens: if iscode(t.pattr): @@ -92,21 +101,32 @@ def disco_loop(disasm, queue, real_out): def disassemble_file(filename, outstream=None): """ - disassemble Python byte-code file (.pyc) + Disassemble Python byte-code file (.pyc). If given a Python source file (".py") file, we'll try to find the corresponding compiled object. """ filename = check_object_path(filename) - (version, timestamp, magic_int, co, is_pypy, source_size, sip_hash) = load_module( - filename - ) - if type(co) == list: + ( + version, + timestamp, + magic_int, + co, + python_implementation, + source_size, + sip_hash, + _, + ) = load_module(filename) + if isinstance(co, list): for con in co: disco(version, con, outstream) else: - disco(version, co, outstream, is_pypy=is_pypy) - co = None + disco( + version, + co, + outstream, + is_pypy=python_implementation == PythonImplementation.PyPy, + ) def _test(): diff --git a/uncompyle6/linenumbers.py b/uncompyle6/linenumbers.py index 357d0e4d2..4bfbad6c6 100644 --- a/uncompyle6/linenumbers.py +++ b/uncompyle6/linenumbers.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2016, 2818, 2020 by Rocky Bernstein +# Copyright (c) 2015-2016, 2018, 2020 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/uncompyle6/main.py b/uncompyle6/main.py index 1cdd26573..f2ac1a6ca 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2022 Rocky Bernstein +# Copyright (C) 2018-2025 Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -13,58 +13,88 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from typing import Any, Tuple -import datetime, py_compile, os, sys +import ast +import datetime +import os +import os.path as osp +import py_compile +import subprocess +import sys +import tempfile +from typing import Any, Optional, TextIO, Tuple from xdis import iscode -from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE, version_tuple_to_str -from uncompyle6.disas import check_object_path -from uncompyle6.semantics import pysource -from uncompyle6.semantics.pysource import PARSER_DEFAULT_DEBUG +from xdis.load import load_module +from xdis.version_info import ( + IS_PYPY, + PYTHON_VERSION_TRIPLE, + PythonImplementation, + version_tuple_to_str, +) + +from uncompyle6.code_fns import check_object_path from uncompyle6.parser import ParserError +from uncompyle6.semantics.fragments import code_deparse as code_deparse_fragments +from uncompyle6.semantics.linemap import deparse_code_with_map +from uncompyle6.semantics.pysource import ( + PARSER_DEFAULT_DEBUG, + SourceWalkerError, + code_deparse, +) from uncompyle6.version import __version__ # from uncompyle6.linenumbers import line_number_mapping -from uncompyle6.semantics.pysource import code_deparse -from uncompyle6.semantics.fragments import code_deparse as code_deparse_fragments -from uncompyle6.semantics.linemap import deparse_code_with_map - -from xdis.load import load_module def _get_outstream(outfile: str) -> Any: - dir = os.path.dirname(outfile) + """ + Return an opened output file descriptor for ``outfile``. + """ + dir_name = osp.dirname(outfile) failed_file = outfile + "_failed" - if os.path.exists(failed_file): + if osp.exists(failed_file): os.remove(failed_file) try: - os.makedirs(dir) + os.makedirs(dir_name) except OSError: pass return open(outfile, mode="w", encoding="utf-8") +def syntax_check(filename: str) -> bool: + with open(filename) as f: + source = f.read() + valid = True + try: + ast.parse(source) + except SyntaxError: + valid = False + return valid + + def decompile( co, bytecode_version: Tuple[int] = PYTHON_VERSION_TRIPLE, - out=sys.stdout, - showasm=None, + out: Optional[TextIO] = sys.stdout, + showasm: Optional[str] = None, showast={}, timestamp=None, showgrammar=False, source_encoding=None, code_objects={}, source_size=None, - is_pypy=None, + is_pypy: bool = False, magic_int=None, mapstream=None, do_fragments=False, compile_mode="exec", + start_offset: int = 0, + stop_offset: int = -1, ) -> Any: """ ingests and deparses a given code block 'co' - if `bytecode_version` is None, use the current Python intepreter + if `bytecode_version` is None, use the current Python interpreter version. Caller is responsible for closing `out` and `mapstream` @@ -85,10 +115,10 @@ def write(s): run_pypy_str = "PyPy " if IS_PYPY else "" sys_version_lines = sys.version.split("\n") if source_encoding: - write("# -*- coding: %s -*-" % source_encoding) + write(f"# -*- coding: {source_encoding} -*-") write( "# uncompyle6 version %s\n" - "# %sPython bytecode version base %s%s\n# Decompiled from: %sPython %s" + "# %sPython bytecode version base %s%s\n# Decompiled from: %sPython %s" % ( __version__, co_pypy_str, @@ -99,42 +129,40 @@ def write(s): ) ) if co.co_filename: - write("# Embedded file name: %s" % co.co_filename) + write(f"# Embedded file name: {co.co_filename}") if timestamp: - write("# Compiled at: %s" % datetime.datetime.fromtimestamp(timestamp)) + write(f"# Compiled at: {datetime.datetime.fromtimestamp(timestamp)}") if source_size: write("# Size of source mod 2**32: %d bytes" % source_size) - # maybe a second -a will do before as well - asm = "after" if showasm else None - grammar = dict(PARSER_DEFAULT_DEBUG) if showgrammar: grammar["reduce"] = True - debug_opts = {"asm": asm, "tree": showast, "grammar": grammar} + debug_opts = {"asm": showasm, "tree": showast, "grammar": grammar} try: if mapstream: if isinstance(mapstream, str): mapstream = _get_outstream(mapstream) + debug_opts = {"asm": showasm, "tree": showast, "grammar": grammar} + deparsed = deparse_code_with_map( - bytecode_version, - co, - out, - showasm, - showast, - showgrammar, + co=co, + out=out, + version=bytecode_version, code_objects=code_objects, is_pypy=is_pypy, + debug_opts=debug_opts, ) header_count = 3 + len(sys_version_lines) - linemap = [ - (line_no, deparsed.source_linemap[line_no] + header_count) - for line_no in sorted(deparsed.source_linemap.keys()) - ] - mapstream.write("\n\n# %s\n" % linemap) + if deparsed is not None: + linemap = [ + (line_no, deparsed.source_linemap[line_no] + header_count) + for line_no in sorted(deparsed.source_linemap.keys()) + ] + mapstream.write(f"\n\n# {linemap}\n") else: if do_fragments: deparse_fn = code_deparse_fragments @@ -144,15 +172,18 @@ def write(s): co, out, bytecode_version, - debug_opts=debug_opts, is_pypy=is_pypy, + debug_opts=debug_opts, compile_mode=compile_mode, + start_offset=start_offset, + stop_offset=stop_offset, ) pass + real_out.write("\n") return deparsed - except pysource.SourceWalkerError as e: + except SourceWalkerError as e: # deparsing failed - raise pysource.SourceWalkerError(str(e)) + raise SourceWalkerError(str(e)) def compile_file(source_path: str) -> str: @@ -162,24 +193,26 @@ def compile_file(source_path: str) -> str: basename = source_path if hasattr(sys, "pypy_version_info"): - bytecode_path = "%s-pypy%s.pyc" % (basename, version_tuple_to_str()) + bytecode_path = f"{basename}-pypy{version_tuple_to_str()}.pyc" else: - bytecode_path = "%s-%s.pyc" % (basename, version_tuple_to_str()) + bytecode_path = f"{basename}-{version_tuple_to_str()}.pyc" - print("compiling %s to %s" % (source_path, bytecode_path)) + print(f"compiling {source_path} to {bytecode_path}") py_compile.compile(source_path, bytecode_path, "exec") return bytecode_path def decompile_file( filename: str, - outstream=None, - showasm=None, + outstream: Optional[TextIO] = None, + showasm: Optional[str] = None, showast={}, showgrammar=False, source_encoding=None, mapstream=None, do_fragments=False, + start_offset=0, + stop_offset=-1, ) -> Any: """ decompile Python byte-code file (.pyc). Return objects to @@ -188,9 +221,16 @@ def decompile_file( filename = check_object_path(filename) code_objects = {} - (version, timestamp, magic_int, co, is_pypy, source_size, sip_hash) = load_module( - filename, code_objects - ) + ( + version, + timestamp, + magic_int, + co, + python_implementation, + source_size, + _, + _, + ) = load_module(filename, code_objects) if isinstance(co, list): deparsed = [] @@ -206,9 +246,11 @@ def decompile_file( showgrammar, source_encoding, code_objects=code_objects, - is_pypy=is_pypy, + is_pypy=python_implementation == PythonImplementation.PyPy, magic_int=magic_int, mapstream=mapstream, + start_offset=start_offset, + stop_offset=stop_offset, ), ) else: @@ -224,32 +266,34 @@ def decompile_file( source_encoding, code_objects=code_objects, source_size=source_size, - is_pypy=is_pypy, + is_pypy=python_implementation == PythonImplementation.PyPy, magic_int=magic_int, mapstream=mapstream, do_fragments=do_fragments, compile_mode="exec", + start_offset=start_offset, + stop_offset=stop_offset, ) ] - co = None return deparsed # FIXME: combine into an options parameter def main( in_base: str, - out_base: str, + out_base: Optional[str], compiled_files: list, source_files: list, - outfile=None, - showasm=None, + outfile: Optional[str] = None, + showasm: Optional[str] = None, showast={}, - do_verify=False, - showgrammar=False, + do_verify: Optional[str] = None, + showgrammar: bool = False, source_encoding=None, - raise_on_error=False, do_linemaps=False, do_fragments=False, + start_offset: int = 0, + stop_offset: int = -1, ) -> Tuple[int, int, int, int]: """ in_base base directory for input files @@ -262,18 +306,22 @@ def main( - files below out_base out_base=... - stdout out_base=None, outfile=None """ - tot_files = okay_files = failed_files = verify_failed_files = 0 + tot_files = okay_files = failed_files = 0 + verify_failed_files = 0 if do_verify else 0 current_outfile = outfile linemap_stream = None for source_path in source_files: compiled_files.append(compile_file(source_path)) + if len(compiled_files) == 0: + return 0, 0, 0, 0 + for filename in compiled_files: - infile = os.path.join(in_base, filename) + infile = osp.join(in_base, filename) # print("XXX", infile) - if not os.path.exists(infile): - sys.stderr.write("File '%s' doesn't exist. Skipped\n" % infile) + if not osp.exists(infile): + sys.stderr.write(f"File '{infile}' doesn't exist. Skipped\n") continue if do_linemaps: @@ -285,14 +333,19 @@ def main( if outfile: # outfile was given as parameter outstream = _get_outstream(outfile) elif out_base is None: - outstream = sys.stdout + out_base = tempfile.mkdtemp(prefix="py-dis-") + if do_verify and filename.endswith(".pyc"): + current_outfile = osp.join(out_base, filename[0:-1]) + outstream = open(current_outfile, "w") + else: + outstream = sys.stdout if do_linemaps: linemap_stream = sys.stdout else: if filename.endswith(".pyc"): - current_outfile = os.path.join(out_base, filename[0:-1]) + current_outfile = osp.join(out_base, filename[0:-1]) else: - current_outfile = os.path.join(out_base, filename) + "_dis" + current_outfile = osp.join(out_base, filename) + "_dis" pass pass @@ -300,9 +353,9 @@ def main( # print(current_outfile, file=sys.stderr) - # Try to uncompile the input file + # Try to decompile the input file. try: - deparsed = decompile_file( + deparsed_objects = decompile_file( infile, outstream, showasm, @@ -311,27 +364,79 @@ def main( source_encoding, linemap_stream, do_fragments, + start_offset, + stop_offset, ) if do_fragments: - for d in deparsed: + for deparsed_object in deparsed_objects: last_mod = None - offsets = d.offsets + offsets = deparsed_object.offsets for e in sorted( [k for k in offsets.keys() if isinstance(k[1], int)] ): if e[0] != last_mod: line = "=" * len(e[0]) - outstream.write("%s\n%s\n%s\n" % (line, e[0], line)) + outstream.write(f"{line}\n{e[0]}\n{line}\n") last_mod = e[0] info = offsets[e] - extractInfo = d.extract_node_info(info) - outstream.write("%s" % info.node.format().strip() + "\n") - outstream.write(extractInfo.selectedLine + "\n") - outstream.write(extractInfo.markerLine + "\n\n") + extract_info = deparsed_object.extract_node_info(info) + outstream.write(f"{info.node.format().strip()}" + "\n") + outstream.write(extract_info.selectedLine + "\n") + outstream.write(extract_info.markerLine + "\n\n") pass + + if do_verify: + for deparsed_object in deparsed_objects: + deparsed_object.f.close() + if PYTHON_VERSION_TRIPLE[:2] != deparsed_object.version[:2]: + sys.stdout.write( + f"\n# skipping running {deparsed_object.f.name}; it is " + f"{version_tuple_to_str(deparsed_object.version, end=2)}, " + "and we are " + f"{version_tuple_to_str(PYTHON_VERSION_TRIPLE, end=2)}\n" + ) + else: + check_type = "syntax check" + if do_verify == "run": + check_type = "run" + if PYTHON_VERSION_TRIPLE >= (3, 7): + result = subprocess.run( + [sys.executable, deparsed_object.f.name], + capture_output=True, + ) + valid = result.returncode == 0 + output = result.stdout.decode() + if output: + print(output) + pass + else: + result = subprocess.run( + [sys.executable, deparsed_object.f.name], + ) + valid = result.returncode == 0 + pass + if not valid: + print(result.stderr.decode()) + + else: + valid = syntax_check(deparsed_object.f.name) + + if not valid: + verify_failed_files += 1 + sys.stderr.write( + f"\n# {check_type} failed on file {deparsed_object.f.name}\n" + ) + + # sys.stderr.write(f"Ran {deparsed_object.f.name}\n") pass tot_files += 1 - except (ValueError, SyntaxError, ParserError, pysource.SourceWalkerError) as e: + except ( + ValueError, + SyntaxError, + ParserError, + SourceWalkerError, + ImportError, + ) as e: sys.stdout.write("\n") sys.stderr.write(f"\n# file {infile}\n# {e}\n") failed_files += 1 @@ -347,15 +452,21 @@ def main( sys.stdout.write(f"\n{str(e)}\n") if str(e).startswith("Unsupported Python"): sys.stdout.write("\n") - sys.stderr.write( - "\n# Unsupported bytecode in file %s\n# %s\n" % (infile, e) - ) + sys.stderr.write(f"\n# Unsupported bytecode in file {infile}\n# {e}\n") + failed_files += 1 + if current_outfile: + outstream.close() + os.rename(current_outfile, current_outfile + "_failed") + else: + sys.stderr.write("\n# %s" % sys.exc_info()[1]) + sys.stderr.write("\n# Can't uncompile %s\n" % infile) + else: if outfile: outstream.close() os.remove(outfile) sys.stdout.write("\n") - sys.stderr.write("\nLast file: %s " % (infile)) + sys.stderr.write(f"\nLast file: {infile} ") raise # except: @@ -375,7 +486,7 @@ def main( okay_files += 1 if not current_outfile: mess = "\n# okay decompiling" - # mem_usage = __memUsage() + # mem_usage = __mem_usage() print(mess, infile) if current_outfile: sys.stdout.write( @@ -383,7 +494,6 @@ def main( % ( infile, status_msg( - do_verify, tot_files, okay_files, failed_files, @@ -394,37 +504,36 @@ def main( try: # FIXME: Something is weird with Pypy here sys.stdout.flush() - except: + except Exception: pass if current_outfile: sys.stdout.write("\n") try: # FIXME: Something is weird with Pypy here sys.stdout.flush() - except: + except Exception: pass pass - return (tot_files, okay_files, failed_files, verify_failed_files) + return tot_files, okay_files, failed_files, verify_failed_files # ---- main ---- if sys.platform.startswith("linux") and os.uname()[2][:2] in ["2.", "3.", "4."]: - def __memUsage(): + def __mem_sage(): mi = open("/proc/self/stat", "r") mu = mi.readline().split()[22] mi.close() return int(mu) / 1000000 - else: - def __memUsage(): + def __mem_usage(): return "" -def status_msg(do_verify, tot_files, okay_files, failed_files, verify_failed_files): +def status_msg(tot_files, okay_files, failed_files, verify_failed_files): if tot_files == 1: if failed_files: return "\n# decompile failed" diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 785b0a455..35d9bb552 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2022 Rocky Bernstein +# Copyright (c) 2015-2024 Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock @@ -21,10 +21,11 @@ import sys -from spark_parser import GenericASTBuilder, DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG -from uncompyle6.show import maybe_show_asm +from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG, GenericASTBuilder from xdis import iscode +from uncompyle6.show import maybe_show_asm + class ParserError(Exception): def __init__(self, token, offset, debug=PARSER_DEFAULT_DEBUG): @@ -44,8 +45,8 @@ def nop_func(self, args): class PythonParser(GenericASTBuilder): - def __init__(self, SyntaxTree, start, debug): - super(PythonParser, self).__init__(SyntaxTree, start, debug) + def __init__(self, syntax_tree_class, start, debug): + super(PythonParser, self).__init__(syntax_tree_class, start, debug) # FIXME: customize per python parser version # These are the non-terminals we should collect into a list. @@ -75,6 +76,8 @@ def __init__(self, SyntaxTree, start, debug): "come_from_loops", # Python 3.7+ "importlist37", + # Python < 1.4 + "args_store", ] self.collect = frozenset(nt_list) @@ -89,7 +92,14 @@ def __init__(self, SyntaxTree, start, debug): # singleton reduction that we can simplify. It also happens to be optional # in its other derivation self.optional_nt |= frozenset( - ("come_froms", "suite_stmts", "l_stmts_opt", "c_stmts_opt", "stmts_opt", "stmt") + ( + "come_froms", + "suite_stmts", + "l_stmts_opt", + "c_stmts_opt", + "stmts_opt", + "stmt", + ) ) # Reduce singleton reductions in these nonterminals: @@ -97,10 +107,11 @@ def __init__(self, SyntaxTree, start, debug): # so on but that would require major changes to the # semantic actions self.singleton = frozenset( - ("str", "store", "_stmts", "suite_stmts_opt", "inplace_op") + ("str", "store", "_stmts", "suite_stmts_opt", "inplace_op", "add_value") ) # Instructions filled in from scanner self.insts = [] + self.version = tuple() def ast_first_offset(self, ast): if hasattr(ast, "offset"): @@ -110,10 +121,10 @@ def ast_first_offset(self, ast): def add_unique_rule(self, rule, opname, arg_count, customize): """Add rule to grammar, but only if it hasn't been added previously - opname and stack_count are used in the customize() semantic - the actions to add the semantic action rule. Stack_count is - used in custom opcodes like MAKE_FUNCTION to indicate how - many arguments it has. Often it is not used. + opname and stack_count are used in the customize() semantic + the actions to add the semantic action rule. Stack_count is + used in custom opcodes like MAKE_FUNCTION to indicate how + many arguments it has. Often it is not used. """ if rule not in self.new_rules: # print("XXX ", rule) # debug @@ -149,9 +160,9 @@ def cleanup(self): Remove recursive references to allow garbage collector to collect this object. """ - for dict in (self.rule2func, self.rules, self.rule2name): - for i in list(dict.keys()): - dict[i] = None + for rule_dict in (self.rule2func, self.rules, self.rule2name): + for i in list(rule_dict.keys()): + rule_dict[i] = None for i in dir(self): setattr(self, i, None) @@ -162,11 +173,11 @@ def debug_reduce(self, rule, tokens, parent, last_token_pos): def fix(c): s = str(c) - last_token_pos = s.find("_") - if last_token_pos == -1: + token_pos = s.find("_") + if token_pos == -1: return s else: - return s[:last_token_pos] + return s[:token_pos] prefix = "" if parent and tokens: @@ -197,7 +208,7 @@ def error(self, instructions, index): if instructions[finish].linestart: break pass - if start > 0: + if start >= 0: err_token = instructions[index] print("Instruction context:") for i in range(start, finish): @@ -211,10 +222,18 @@ def error(self, instructions, index): raise ParserError(None, -1, self.debug["reduce"]) def get_pos_kw(self, token): - """Return then the number of positional parameters and - represented by the attr field of token""" - # Low byte indicates number of positional paramters, + """ + Return then the number of positional parameters and keyword + parfameters represented by the attr (operand) field of + token. + + This appears in CALL_FUNCTION or CALL_METHOD (PyPy) tokens + """ + # Low byte indicates number of positional parameters, # high byte number of keyword parameters + assert token.kind.startswith("CALL_FUNCTION") or token.kind.startswith( + "CALL_METHOD" + ) args_pos = token.attr & 0xFF args_kw = (token.attr >> 8) & 0xFF return args_pos, args_kw @@ -259,13 +278,13 @@ def __ambiguity(self, children): print(children) return GenericASTBuilder.ambiguity(self, children) - def resolve(self, list): - if len(list) == 2 and "function_def" in list and "assign" in list: + def resolve(self, rule: list): + if len(rule) == 2 and "function_def" in rule and "assign" in rule: return "function_def" - if "grammar" in list and "expr" in list: + if "grammar" in rule and "expr" in rule: return "expr" - # print >> sys.stderr, 'resolve', str(list) - return GenericASTBuilder.resolve(self, list) + # print >> sys.stderr, 'resolve', str(rule) + return GenericASTBuilder.resolve(self, rule) ############################################### # Common Python 2 and Python 3 grammar rules # @@ -295,6 +314,9 @@ def p_stmt(self, args): c_stmts ::= lastc_stmt c_stmts ::= continues + ending_return ::= RETURN_VALUE RETURN_LAST + ending_return ::= RETURN_VALUE_LAMBDA LAMBDA_MARKER + lastc_stmt ::= iflaststmt lastc_stmt ::= forelselaststmt lastc_stmt ::= ifelsestmtc @@ -352,7 +374,7 @@ def p_stmt(self, args): stmt ::= tryelsestmt stmt ::= tryfinallystmt stmt ::= with - stmt ::= withasstmt + stmt ::= with_as stmt ::= delete delete ::= DELETE_FAST @@ -369,6 +391,10 @@ def p_stmt(self, args): returns ::= return returns ::= _stmts return + + # NOP + stmt ::= nop_stmt + nop_stmt ::= NOP """ pass @@ -588,11 +614,12 @@ def p_expr(self, args): compare ::= compare_single compare_single ::= expr expr COMPARE_OP - # A compare_chained is two comparisions like x <= y <= z - compare_chained ::= expr compare_chained1 ROT_TWO POP_TOP _come_froms - compare_chained2 ::= expr COMPARE_OP JUMP_FORWARD + # A compare_chained is two comparisons, as in: x <= y <= z + compare_chained ::= expr compared_chained_middle ROT_TWO POP_TOP + _come_froms + compare_chained_right ::= expr COMPARE_OP JUMP_FORWARD - # Non-null kvlist items are broken out in the indiviual grammars + # Non-null kvlist items are broken out in the individual grammars kvlist ::= # Positional arguments in make_function @@ -646,6 +673,8 @@ def get_python_parser( version = version[:2] + p = None + # FIXME: there has to be a better way... # We could do this as a table lookup, but that would force us # in import all of the parsers all of the time. Perhaps there is @@ -659,7 +688,7 @@ def get_python_parser( if compile_mode == "exec": p = parse10.Python10Parser(debug_parser) else: - p = parse10.Python01ParserSingle(debug_parser) + p = parse10.Python10ParserSingle(debug_parser) elif version == (1, 1): import uncompyle6.parsers.parse11 as parse11 @@ -865,6 +894,7 @@ def python_parser( :param showasm: Flag which determines whether the disassembled and ingested code is written to sys.stdout or not. :param parser_debug: dict containing debug flags for the spark parser. + :param is_pypy: True if we are running PyPY :return: Abstract syntax tree representation of the code object. """ @@ -893,9 +923,9 @@ def python_parser( if __name__ == "__main__": def parse_test(co): - from uncompyle6 import PYTHON_VERSION, IS_PYPY + from xdis import IS_PYPY, PYTHON_VERSION_TRIPLE - ast = python_parser(PYTHON_VERSION, co, showasm=True, is_pypy=IS_PYPY) + ast = python_parser(PYTHON_VERSION_TRIPLE, co, showasm=True, is_pypy=IS_PYPY) print(ast) return diff --git a/uncompyle6/parsers/parse13.py b/uncompyle6/parsers/parse13.py index 593805ee9..307af36bc 100644 --- a/uncompyle6/parsers/parse13.py +++ b/uncompyle6/parsers/parse13.py @@ -1,11 +1,12 @@ -# Copyright (c) 2018 Rocky Bernstein +# Copyright (c) 2018, 2023 Rocky Bernstein from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG + from uncompyle6.parser import PythonParserSingle from uncompyle6.parsers.parse14 import Python14Parser -class Python13Parser(Python14Parser): +class Python13Parser(Python14Parser): def p_misc13(self, args): """ # Nothing here yet, but will need to add LOAD_GLOBALS @@ -24,7 +25,6 @@ def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): # """) # self.check_reduce['doc_junk'] = 'tokens' - # def reduce_is_invalid(self, rule, ast, tokens, first, last): # invalid = super(Python14Parser, # self).reduce_is_invalid(rule, ast, @@ -35,11 +35,11 @@ def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): # return not isinstance(tokens[first].pattr, str) - class Python13ParserSingle(Python13Parser, PythonParserSingle): pass -if __name__ == '__main__': + +if __name__ == "__main__": # Check grammar p = Python13Parser() p.check_grammar() diff --git a/uncompyle6/parsers/parse14.py b/uncompyle6/parsers/parse14.py index 250f37f47..2064e6095 100644 --- a/uncompyle6/parsers/parse14.py +++ b/uncompyle6/parsers/parse14.py @@ -1,18 +1,33 @@ -# Copyright (c) 2018 Rocky Bernstein +# Copyright (c) 2018, 2022 Rocky Bernstein from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG -from uncompyle6.parser import PythonParserSingle +from uncompyle6.parser import PythonParserSingle, nop_func from uncompyle6.parsers.parse15 import Python15Parser class Python14Parser(Python15Parser): def p_misc14(self, args): """ - # Not much here yet, but will probably need to add UNARY_CALL, BINARY_CALL, - # RAISE_EXCEPTION, BUILD_FUNCTION, UNPACK_ARG, UNPACK_VARARG, LOAD_LOCAL, - # SET_FUNC_ARGS, and RESERVE_FAST + # Not much here yet, but will probably need to add UNARY_CALL, + # LOAD_LOCAL, SET_FUNC_ARGS + + args ::= RESERVE_FAST UNPACK_ARG args_store + args_store ::= STORE_FAST* + call ::= expr tuple BINARY_CALL + expr ::= call + kv ::= DUP_TOP expr ROT_TWO LOAD_CONST STORE_SUBSCR + mkfunc ::= LOAD_CODE BUILD_FUNCTION + print_expr_stmt ::= expr PRINT_EXPR + raise_stmt2 ::= expr expr RAISE_EXCEPTION + star_args ::= RESERVE_FAST UNPACK_VARARG_1 args_store + stmt ::= args + stmt ::= print_expr_stmt + stmt ::= star_args + stmt ::= varargs + varargs ::= RESERVE_FAST UNPACK_VARARG_0 args_store # Not strictly needed, but tidies up output + stmt ::= doc_junk doc_junk ::= LOAD_CONST POP_TOP @@ -42,7 +57,14 @@ def customize_grammar_rules(self, tokens, customize): jb_pop POP_BLOCK else_suitel COME_FROM """) - self.check_reduce['doc_junk'] = 'tokens' + self.check_reduce["doc_junk"] = "tokens" + for i, token in enumerate(tokens): + opname = token.kind + opname_base = opname[:opname.rfind("_")] + + if opname_base == "UNPACK_VARARG": + if token.attr > 1: + self.addRule(f"star_args ::= RESERVE_FAST {opname} args_store", nop_func) def reduce_is_invalid(self, rule, ast, tokens, first, last): diff --git a/uncompyle6/parsers/parse2.py b/uncompyle6/parsers/parse2.py index fc676883a..6b871968f 100644 --- a/uncompyle6/parsers/parse2.py +++ b/uncompyle6/parsers/parse2.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2021 Rocky Bernstein +# Copyright (c) 2015-2021, 2024 Rocky Bernstein # Copyright (c) 2000-2002 by hartmut Goebel # # Copyright (c) 1999 John Aycock @@ -27,10 +27,11 @@ from __future__ import print_function -from uncompyle6.parsers.reducecheck import (except_handler_else, ifelsestmt, tryelsestmt) +from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG + from uncompyle6.parser import PythonParser, PythonParserSingle, nop_func +from uncompyle6.parsers.reducecheck import except_handler_else, ifelsestmt, tryelsestmt from uncompyle6.parsers.treenode import SyntaxTree -from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG class Python2Parser(PythonParser): @@ -312,6 +313,19 @@ def customize_grammar_rules(self, tokens, customize): opname_base = opname[: opname.rfind("_")] + if opname in ("BUILD_CONST_LIST", "BUILD_CONST_SET"): + rule = ( + """ + add_consts ::= add_value+ + add_value ::= ADD_VALUE + add_value ::= ADD_VALUE_VAR + const_list ::= COLLECTION_START add_consts %s + expr ::= const_list + """ + % opname + ) + self.addRule(rule, nop_func) + # The order of opname listed is roughly sorted below if opname_base in ("BUILD_LIST", "BUILD_SET", "BUILD_TUPLE"): # We do this complicated test to speed up parsing of @@ -392,7 +406,6 @@ def customize_grammar_rules(self, tokens, customize): "CALL_FUNCTION_VAR_KW", "CALL_FUNCTION_KW", ): - args_pos, args_kw = self.get_pos_kw(token) # number of apply equiv arguments: @@ -513,7 +526,7 @@ def customize_grammar_rules(self, tokens, customize): custom_seen_ops.add(opname) continue elif opname == "LOAD_LISTCOMP": - self.addRule("expr ::= listcomp", nop_func) + self.addRule("expr ::= list_comp", nop_func) custom_seen_ops.add(opname) continue elif opname == "LOAD_SETCOMP": @@ -642,22 +655,23 @@ def customize_grammar_rules(self, tokens, customize): pass self.reduce_check_table = { - # "and": and_check, + # "and": and_invalid, "except_handler_else": except_handler_else, "ifelsestmt": ifelsestmt, - # "or": or_check, + # "or": or_invalid, "tryelsestmt": tryelsestmt, "tryelsestmtl": tryelsestmt, } self.check_reduce["and"] = "AST" + self.check_reduce["assert_expr_and"] = "AST" + self.check_reduce["aug_assign2"] = "AST" self.check_reduce["except_handler_else"] = "tokens" + self.check_reduce["ifelsestmt"] = "AST" + self.check_reduce["ifstmt"] = "tokens" + self.check_reduce["or"] = "AST" self.check_reduce["raise_stmt1"] = "tokens" - self.check_reduce["assert_expr_and"] = "AST" self.check_reduce["tryelsestmt"] = "AST" self.check_reduce["tryelsestmtl"] = "AST" - self.check_reduce["aug_assign2"] = "AST" - self.check_reduce["or"] = "AST" - self.check_reduce["ifstmt"] = "tokens" # self.check_reduce['_stmts'] = 'AST' # Dead code testing... @@ -688,7 +702,7 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): # an optimization where the "and" jump_false is back to a loop. jmp_false = ast[1] if jmp_false[0] == "POP_JUMP_IF_FALSE": - while (first < last and isinstance(tokens[last].offset, str)): + while first < last and isinstance(tokens[last].offset, str): last -= 1 if jmp_false[0].attr < tokens[last].offset: return True @@ -697,8 +711,10 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): # or that it jumps to the same place as the end of "and" jmp_false = ast[1][0] jmp_target = jmp_false.offset + jmp_false.attr + 3 - return not (jmp_target == tokens[last].offset or - tokens[last].pattr == jmp_false.pattr) + return not ( + jmp_target == tokens[last].offset + or tokens[last].pattr == jmp_false.pattr + ) # Dead code testing... # if lhs == 'while1elsestmt': # from trepan.api import debug; debug() diff --git a/uncompyle6/parsers/parse22.py b/uncompyle6/parsers/parse22.py index 108a3c311..8c4f34ae5 100644 --- a/uncompyle6/parsers/parse22.py +++ b/uncompyle6/parsers/parse22.py @@ -29,6 +29,10 @@ def customize_grammar_rules(self, tokens, customize): self.remove_rules(""" kvlist ::= kvlist kv2 """) + if self.version[:2] <= (2, 2): + # TODO: We may add something different or customize something + del self.reduce_check_table["ifstmt"] + class Python22ParserSingle(Python23Parser, PythonParserSingle): pass diff --git a/uncompyle6/parsers/parse24.py b/uncompyle6/parsers/parse24.py index 11bf4dcc7..575b1b436 100644 --- a/uncompyle6/parsers/parse24.py +++ b/uncompyle6/parsers/parse24.py @@ -1,21 +1,23 @@ -# Copyright (c) 2016-2018, 2020 Rocky Bernstein +# Copyright (c) 2016-2018, 2020, 2022-2024 Rocky Bernstein """ spark grammar differences over Python2.5 for Python 2.4. """ -from uncompyle6.parser import PythonParserSingle from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG + +from uncompyle6.parser import PythonParserSingle from uncompyle6.parsers.parse25 import Python25Parser + class Python24Parser(Python25Parser): def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): super(Python24Parser, self).__init__(debug_parser) self.customized = {} def p_misc24(self, args): - ''' + """ # Python 2.4 only adds something like the below for if 1: - # However we will just treat it as a noop (which of course messes up + # However we will just treat it as a noop which messes up # simple verify of bytecode. # See also below in reduce_is_invalid where we check that the JUMP_FORWARD # target matches the COME_FROM target @@ -69,62 +71,62 @@ def p_misc24(self, args): # Python 2.3- use kv kvlist ::= kvlist kv2 kv2 ::= DUP_TOP expr expr ROT_THREE STORE_SUBSCR - ''' + """ def remove_rules_24(self): - self.remove_rules(""" + self.remove_rules( + """ expr ::= if_exp - """) - + """ + ) def customize_grammar_rules(self, tokens, customize): - self.remove_rules(""" + self.remove_rules( + """ gen_comp_body ::= expr YIELD_VALUE POP_TOP kvlist ::= kvlist kv3 while1stmt ::= SETUP_LOOP l_stmts JUMP_BACK COME_FROM while1stmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK COME_FROM while1stmt ::= SETUP_LOOP returns COME_FROM whilestmt ::= SETUP_LOOP testexpr returns POP_BLOCK COME_FROM + with ::= expr setupwith SETUP_FINALLY suite_stmts_opt POP_BLOCK + LOAD_CONST COME_FROM with_cleanup + with_as ::= expr setupwithas store suite_stmts_opt POP_BLOCK + LOAD_CONST COME_FROM with_cleanup with_cleanup ::= LOAD_FAST DELETE_FAST WITH_CLEANUP END_FINALLY with_cleanup ::= LOAD_NAME DELETE_NAME WITH_CLEANUP END_FINALLY - withasstmt ::= expr setupwithas store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM with_cleanup - with ::= expr setupwith SETUP_FINALLY suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM with_cleanup - stmt ::= with - stmt ::= withasstmt - """) + stmt ::= with + stmt ::= with_as + """ + ) super(Python24Parser, self).customize_grammar_rules(tokens, customize) self.remove_rules_24() if self.version[:2] == (2, 4): - self.check_reduce['nop_stmt'] = 'tokens' + self.check_reduce["nop_stmt"] = "tokens" + + if self.version[:2] <= (2, 4): + # TODO: We may add something different or customize something + del self.reduce_check_table["ifelsestmt"] def reduce_is_invalid(self, rule, ast, tokens, first, last): - invalid = super(Python24Parser, - self).reduce_is_invalid(rule, ast, - tokens, first, last) + invalid = super(Python24Parser, self).reduce_is_invalid( + rule, ast, tokens, first, last + ) if invalid or tokens is None: return invalid lhs = rule[0] - if lhs == 'nop_stmt': - l = len(tokens) - if 0 <= l < len(tokens): + if lhs == "nop_stmt": + token_len = len(tokens) + if 0 <= token_len < len(tokens): return not int(tokens[first].pattr) == tokens[last].offset - elif lhs == 'try_except': - if last == len(tokens): - last -= 1 - if tokens[last] != 'COME_FROM' and tokens[last-1] == 'COME_FROM': - last -= 1 - return (tokens[last] == 'COME_FROM' - and tokens[last-1] == 'END_FINALLY' - and tokens[last-2] == 'POP_TOP' - and tokens[last-3].kind != 'JUMP_FORWARD') - - return False + class Python24ParserSingle(Python24Parser, PythonParserSingle): pass -if __name__ == '__main__': + +if __name__ == "__main__": # Check grammar p = Python24Parser() p.check_grammar() diff --git a/uncompyle6/parsers/parse25.py b/uncompyle6/parsers/parse25.py index 927f0ab6f..06bea5b80 100644 --- a/uncompyle6/parsers/parse25.py +++ b/uncompyle6/parsers/parse25.py @@ -1,12 +1,14 @@ -# Copyright (c) 2016-2018, 2020 Rocky Bernstein +# Copyright (c) 2016-2018, 2020, 2022, 2024 Rocky Bernstein """ spark grammar differences over Python2.6 for Python 2.5. """ -from uncompyle6.parser import PythonParserSingle from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG + +from uncompyle6.parser import PythonParserSingle from uncompyle6.parsers.parse26 import Python26Parser -from uncompyle6.parsers.reducecheck import (ifelsestmt) +from uncompyle6.parsers.reducecheck import ifelsestmt + class Python25Parser(Python26Parser): def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): @@ -31,9 +33,11 @@ def p_misc25(self, args): POP_BLOCK LOAD_CONST COME_FROM with_cleanup # Semantic actions want store to be at index 2 - withasstmt ::= expr setupwithas store suite_stmts_opt + with_as ::= expr setupwithas store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM with_cleanup + # The last except of a "try: ... except" can do this... + except_suite ::= c_stmts_opt COME_FROM JUMP_ABSOLUTE POP_TOP store ::= STORE_NAME store ::= STORE_FAST @@ -46,7 +50,7 @@ def p_misc25(self, args): # Python 2.6 omits the LOAD_FAST DELETE_FAST below # withas is allowed as a "from future" in 2.5 - withasstmt ::= expr setupwithas store suite_stmts_opt + with_as ::= expr setupwithas store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM with_cleanup @@ -56,15 +60,23 @@ def p_misc25(self, args): kvlist ::= kvlist kv kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR + + _ifstmts_jump ::= c_stmts_opt COME_FROM JUMP_ABSOLUTE COME_FROM POP_TOP + + + # "and_then" is a hack around the fact we have THEN detection. + and_then ::= expr JUMP_IF_FALSE THEN POP_TOP expr JUMP_IF_FALSE THEN POP_TOP + testexpr_then ::= and_then """ def customize_grammar_rules(self, tokens, customize): # Remove grammar rules inherited from Python 2.6 or Python 2 - self.remove_rules(""" + self.remove_rules( + """ setupwith ::= DUP_TOP LOAD_ATTR ROT_TWO LOAD_ATTR CALL_FUNCTION_0 POP_TOP with ::= expr setupwith SETUP_FINALLY suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM WITH_CLEANUP END_FINALLY - withasstmt ::= expr setupwithas store suite_stmts_opt + with_as ::= expr setupwithas store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM WITH_CLEANUP END_FINALLY assert2 ::= assert_expr jmp_true LOAD_ASSERT expr CALL_FUNCTION_1 RAISE_VARARGS_1 classdefdeco ::= classdefdeco1 store @@ -87,7 +99,8 @@ def customize_grammar_rules(self, tokens, customize): return_stmt_lambda LAMBDA_MARKER if_exp_not_lambda ::= expr jmp_true_then expr return_if_lambda return_stmt_lambda LAMBDA_MARKER - """) + """ + ) super(Python25Parser, self).customize_grammar_rules(tokens, customize) if self.version[:2] == (2, 5): self.check_reduce["try_except"] = "AST" @@ -95,9 +108,9 @@ def customize_grammar_rules(self, tokens, customize): self.check_reduce["ifelsestmt"] = "AST" def reduce_is_invalid(self, rule, ast, tokens, first, last): - invalid = super(Python25Parser, - self).reduce_is_invalid(rule, ast, - tokens, first, last) + invalid = super(Python25Parser, self).reduce_is_invalid( + rule, ast, tokens, first, last + ) if invalid or tokens is None: return invalid if rule == ("aug_assign1", ("expr", "expr", "inplace_op", "store")): @@ -112,6 +125,7 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): class Python25ParserSingle(Python26Parser, PythonParserSingle): pass + if __name__ == "__main__": # Check grammar p = Python25Parser() diff --git a/uncompyle6/parsers/parse26.py b/uncompyle6/parsers/parse26.py index 2d95a5d89..0eb5f46c8 100644 --- a/uncompyle6/parsers/parse26.py +++ b/uncompyle6/parsers/parse26.py @@ -1,15 +1,22 @@ -# Copyright (c) 2017-2022 Rocky Bernstein +# Copyright (c) 2017-2024 Rocky Bernstein """ spark grammar differences over Python2 for Python 2.6. """ -from uncompyle6.parser import PythonParserSingle from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG + +from uncompyle6.parser import PythonParserSingle from uncompyle6.parsers.parse2 import Python2Parser -from uncompyle6.parsers.reducecheck import (except_handler, tryexcept, tryelsestmt) +from uncompyle6.parsers.reducecheck import ( + except_handler, + ifelsestmt2, + ifstmt2, + tryelsestmt, + tryexcept, +) -class Python26Parser(Python2Parser): +class Python26Parser(Python2Parser): def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): super(Python26Parser, self).__init__(debug_parser) self.customized = {} @@ -58,8 +65,8 @@ def p_try_except26(self, args): except_suite ::= c_stmts_opt jmp_abs come_from_pop # This is what happens after a jump where - # we start a new block. For reasons I don't fully - # understand, there is also a value on the top of the stack + # we start a new block. For reasons that I don't fully + # understand, there is also a value on the top of the stack. come_from_pop ::= COME_FROM POP_TOP come_froms_pop ::= come_froms POP_TOP """ @@ -72,7 +79,7 @@ def p_try_except26(self, args): def p_jumps26(self, args): """ - # The are the equivalents of Python 2.7+'s + # There are the equivalents of Python 2.7+'s # POP_JUMP_IF_TRUE and POP_JUMP_IF_FALSE jmp_true ::= JUMP_IF_TRUE POP_TOP jmp_false ::= JUMP_IF_FALSE POP_TOP @@ -100,8 +107,8 @@ def p_jumps26(self, args): _ifstmts_jump ::= c_stmts_opt JUMP_FORWARD come_froms POP_TOP COME_FROM # This is what happens after a jump where - # we start a new block. For reasons I don't fully - # understand, there is also a value on the top of the stack + # we start a new block. For reasons that I don't fully + # understand, there is also a value on the top of the stack. come_froms_pop ::= come_froms POP_TOP """ @@ -123,13 +130,20 @@ def p_stmt26(self, args): # Semantic actions want else_suitel to be at index 3 ifelsestmtl ::= testexpr c_stmts_opt cf_jb_cf_pop else_suitel ifelsestmtc ::= testexpr c_stmts_opt ja_cf_pop else_suitec + ifelsestmt ::= testexpr stmts_opt ja_cf_pop else_suite + + stmts_opt ::= stmts + stmts_opt ::= + + # The last except of a "try: ... except" can do this... + except_suite ::= stmts_opt COME_FROM JUMP_ABSOLUTE POP_TOP # Semantic actions want suite_stmts_opt to be at index 3 with ::= expr setupwith SETUP_FINALLY suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM WITH_CLEANUP END_FINALLY # Semantic actions want store to be at index 2 - withasstmt ::= expr setupwithas store suite_stmts_opt + with_as ::= expr setupwithas store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM WITH_CLEANUP END_FINALLY # This is truly weird. 2.7 does this (not including POP_TOP) with @@ -207,7 +221,7 @@ def p_stmt26(self, args): """ def p_comp26(self, args): - ''' + """ list_for ::= expr for_iter store list_iter JUMP_BACK come_froms POP_TOP # The JUMP FORWARD below jumps to the JUMP BACK. It seems to happen @@ -232,9 +246,9 @@ def p_comp26(self, args): list_comp ::= BUILD_LIST_0 DUP_TOP store list_iter delete list_comp ::= BUILD_LIST_0 DUP_TOP - store list_iter JUMP_BACK delete + store list_iter JUMP_BACK delete lc_body ::= LOAD_NAME expr LIST_APPEND - lc_body ::= LOAD_FAST expr LIST_APPEND + lc_body ::= LOAD_FAST expr LIST_APPEND comp_for ::= SETUP_LOOP expr for_iter store comp_iter jb_pb_come_from @@ -265,10 +279,10 @@ def p_comp26(self, args): generator_exp ::= LOAD_GENEXPR MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1 COME_FROM list_if ::= expr jmp_false_then list_iter - ''' + """ def p_ret26(self, args): - ''' + """ ret_and ::= expr jmp_false return_expr_or_cond COME_FROM ret_or ::= expr jmp_true return_expr_or_cond COME_FROM if_exp_ret ::= expr jmp_false_then expr RETURN_END_IF POP_TOP return_expr_or_cond @@ -279,7 +293,7 @@ def p_ret26(self, args): # FIXME: split into Python 2.5 ret_or ::= expr jmp_true return_expr_or_cond come_froms - ''' + """ def p_except26(self, args): """ @@ -300,26 +314,22 @@ def p_misc26(self, args): and ::= expr JUMP_IF_FALSE POP_TOP expr JUMP_IF_FALSE POP_TOP - # compare_chained is like x <= y <= z - compare_chained ::= expr compare_chained1 ROT_TWO COME_FROM POP_TOP _come_froms - compare_chained1 ::= expr DUP_TOP ROT_THREE COMPARE_OP - jmp_false compare_chained1 _come_froms - compare_chained1 ::= expr DUP_TOP ROT_THREE COMPARE_OP - jmp_false compare_chained2 _come_froms + # A "compare_chained" is two comparisons like x <= y <= z + compare_chained ::= expr compared_chained_middle ROT_TWO + COME_FROM POP_TOP _come_froms + compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP + jmp_false compared_chained_middle _come_froms + compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP + jmp_false compare_chained_right _come_froms - compare_chained1 ::= expr DUP_TOP ROT_THREE COMPARE_OP - jmp_false_then compare_chained1 _come_froms - compare_chained1 ::= expr DUP_TOP ROT_THREE COMPARE_OP - jmp_false_then compare_chained2 _come_froms + compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP + jmp_false_then compared_chained_middle _come_froms + compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP + jmp_false_then compare_chained_right _come_froms - return_lambda ::= RETURN_VALUE - return_lambda ::= RETURN_END_IF - return_lambda ::= RETURN_END_IF_LAMBDA - return_lambda ::= RETURN_VALUE_LAMBDA - - compare_chained2 ::= expr COMPARE_OP return_expr_lambda - compare_chained2 ::= expr COMPARE_OP RETURN_END_IF_LAMBDA - compare_chained2 ::= expr COMPARE_OP RETURN_END_IF COME_FROM + compare_chained_right ::= expr COMPARE_OP return_expr_lambda + compare_chained_right ::= expr COMPARE_OP RETURN_END_IF_LAMBDA + compare_chained_right ::= expr COMPARE_OP RETURN_END_IF COME_FROM return_if_lambda ::= RETURN_END_IF_LAMBDA POP_TOP stmt ::= if_exp_lambda @@ -347,59 +357,61 @@ def p_misc26(self, args): """ def customize_grammar_rules(self, tokens, customize): - self.remove_rules(""" - withasstmt ::= expr SETUP_WITH store suite_stmts_opt - POP_BLOCK LOAD_CONST COME_FROM_WITH - WITH_CLEANUP END_FINALLY - """) + self.remove_rules( + """ + with_as ::= expr SETUP_WITH store suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM_WITH + WITH_CLEANUP END_FINALLY + """ + ) super(Python26Parser, self).customize_grammar_rules(tokens, customize) self.reduce_check_table = { "except_handler": except_handler, + "ifstmt": ifstmt2, + "ifelsestmt": ifelsestmt2, "tryelsestmt": tryelsestmt, "try_except": tryexcept, "tryelsestmtl": tryelsestmt, } - - self.check_reduce['and'] = 'AST' - self.check_reduce['assert_expr_and'] = 'AST' + self.check_reduce["and"] = "AST" + self.check_reduce["assert_expr_and"] = "AST" self.check_reduce["except_handler"] = "tokens" - self.check_reduce["ifstmt"] = "tokens" + self.check_reduce["ifstmt"] = "AST" self.check_reduce["ifelsestmt"] = "AST" self.check_reduce["forelselaststmtl"] = "tokens" self.check_reduce["forelsestmt"] = "tokens" - self.check_reduce['list_for'] = 'AST' - self.check_reduce['try_except'] = 'AST' - self.check_reduce['tryelsestmt'] = 'AST' - self.check_reduce['tryelsestmtl'] = 'AST' + self.check_reduce["list_for"] = "AST" + self.check_reduce["try_except"] = "AST" + self.check_reduce["tryelsestmt"] = "AST" + self.check_reduce["tryelsestmtl"] = "AST" def reduce_is_invalid(self, rule, ast, tokens, first, last): - invalid = super(Python26Parser, - self).reduce_is_invalid(rule, ast, - tokens, first, last) + invalid = super(Python26Parser, self).reduce_is_invalid( + rule, ast, tokens, first, last + ) lhs = rule[0] if invalid or tokens is None: return invalid if rule in ( - ('and', ('expr', 'jmp_false', 'expr', '\\e_come_from_opt')), - ('and', ('expr', 'jmp_false', 'expr', 'come_from_opt')), - ('assert_expr_and', ('assert_expr', 'jmp_false', 'expr')) - ): - + ("and", ("expr", "jmp_false", "expr", "\\e_come_from_opt")), + ("and", ("expr", "jmp_false", "expr", "come_from_opt")), + ("assert_expr_and", ("assert_expr", "jmp_false", "expr")), + ): # FIXME: workaround profiling bug if ast[1] is None: return False - # For now, we won't let the 2nd 'expr' be a "if_exp_not" + # For now, we won't let the 2nd 'expr' be an "if_exp_not" # However in < 2.6 where we don't have if/else expression it *can* # be. if self.version >= (2, 6) and ast[2][0] == "if_exp_not": return True test_index = last - while tokens[test_index].kind == 'COME_FROM': + while tokens[test_index].kind == "COME_FROM": test_index += 1 - if tokens[test_index].kind.startswith('JUMP_IF'): + if tokens[test_index].kind.startswith("JUMP_IF"): return False # Test that jmp_false jumps to the end of "and" @@ -407,8 +419,10 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): jmp_false = ast[1][0] jmp_target = jmp_false.offset + jmp_false.attr + 3 - return not (jmp_target == tokens[test_index].offset or - tokens[last].pattr == jmp_false.pattr) + return not ( + jmp_target == tokens[test_index].offset + or tokens[last].pattr == jmp_false.pattr + ) elif lhs in ("forelselaststmtl", "forelsestmt"): # print("XXX", first, last) @@ -428,115 +442,94 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): # since the operand can be a relative offset rather than # an absolute offset. setup_inst = self.insts[self.offset2inst_index[tokens[first].offset]] + last = min(len(tokens) - 1, last) if self.version <= (2, 2) and tokens[last] == "COME_FROM": last += 1 - return tokens[last-1].off2int() > setup_inst.argval + return tokens[last - 1].off2int() > setup_inst.argval elif rule == ("ifstmt", ("testexpr", "_ifstmts_jump")): - for i in range(last-1, last-4, -1): + for i in range(last - 1, last - 4, -1): t = tokens[i] if t == "JUMP_FORWARD": - return t.attr > tokens[min(last, len(tokens)-1)].off2int() + return t.attr > tokens[min(last, len(tokens) - 1)].off2int() elif t not in ("POP_TOP", "COME_FROM"): break pass pass elif rule == ( - 'list_for', - ('expr', 'for_iter', 'store', 'list_iter', - 'JUMP_ABSOLUTE', 'come_froms', 'POP_TOP', 'jb_pop')): + "list_for", + ( + "expr", + "for_iter", + "store", + "list_iter", + "JUMP_ABSOLUTE", + "come_froms", + "POP_TOP", + "jb_pop", + ), + ): # The JUMP_ABSOLUTE has to be to the last POP_TOP or this is invalid ja_attr = ast[4].attr return tokens[last].offset != ja_attr - elif lhs == 'try_except': - # We need to distingush try_except from tryelsestmt and we do that - # by checking the jump before the END_FINALLY + elif lhs == "try_except": + # We need to distinguish "try_except" from "tryelsestmt"; we do that + # by looking for a jump before the END_FINALLY to the "else" clause of + # "try else". + # # If we have: - # insn + # # POP_TOP # END_FINALLY # COME_FROM - # then insn has to be either a JUMP_FORWARD or a RETURN_VALUE - # and if it is JUMP_FORWARD, then it has to be a JUMP_FORWARD to right after + # then has to be either a a jump of some sort (JUMP_FORWARD, BREAK_LOOP, JUMP_BACK, or RETURN_VALUE). + # Furthermore, if it is JUMP_FORWARD, then it has to be a JUMP_FORWARD to right after # COME_FROM if last == len(tokens): last -= 1 - if tokens[last] != 'COME_FROM' and tokens[last-1] == 'COME_FROM': + if tokens[last] != "COME_FROM" and tokens[last - 1] == "COME_FROM": last -= 1 - if (tokens[last] == 'COME_FROM' - and tokens[last-1] == 'END_FINALLY' - and tokens[last-2] == 'POP_TOP'): + if ( + tokens[last] == "COME_FROM" + and tokens[last - 1] == "END_FINALLY" + and tokens[last - 2] == "POP_TOP" + ): # A jump of 2 is a jump around POP_TOP, END_FINALLY which # would indicate try/else rather than try - return (tokens[last-3].kind not in frozenset(('JUMP_FORWARD', 'RETURN_VALUE')) - or (tokens[last-3] == 'JUMP_FORWARD' and tokens[last-3].attr != 2)) - elif lhs == 'tryelsestmt': - - # We need to distingush try_except from tryelsestmt and we do that - # by making sure that the jump before the except handler jumps to - # code somewhere before the end of the construct. - # This AST method is slower, but the token-only based approach - # didn't work as it failed with a "try" embedded inside a "try/else" - # since we can't detect COME_FROM boundaries. - - if ast[3] == 'except_handler': - except_handler = ast[3] - if except_handler[0] == 'JUMP_FORWARD': - else_start = int(except_handler[0].pattr) - if last == len(tokens): - last -= 1 - if tokens[last] == 'COME_FROM' and isinstance: - last_offset = int(tokens[last].offset.split('_')[0]) - return else_start >= last_offset - - - # The above test apparently isn't good enough, so we have additional - # checks distinguish try_except from tryelsestmt and we do that - # by checking the jump before the END_FINALLY - # If we have: - # insn - # POP_TOP - # END_FINALLY - # COME_FROM - # then insn is neither a JUMP_FORWARD nor RETURN_VALUE, - # or if it is JUMP_FORWARD, then it can't be a JUMP_FORWARD to right after - # COME_FROM - if last == len(tokens): - last -= 1 - while tokens[last-1] == 'COME_FROM' and tokens[last-2] == 'COME_FROM': - last -= 1 - if tokens[last] == 'COME_FROM' and tokens[last-1] == 'COME_FROM': - last -= 1 - if (tokens[last] == 'COME_FROM' - and tokens[last-1] == 'END_FINALLY' - and tokens[last-2] == 'POP_TOP'): - # A jump of 2 is a jump around POP_TOP, END_FINALLY which - # would indicate try/else rather than try - return (tokens[last-3].kind in frozenset(('JUMP_FORWARD', 'RETURN_VALUE')) - and (tokens[last-3] != 'JUMP_FORWARD' or tokens[last-3].attr == 2)) - + return tokens[last - 3].kind not in frozenset( + ("JUMP_FORWARD", "JUMP_BACK", "BREAK_LOOP", "RETURN_VALUE") + ) or (tokens[last - 3] == "JUMP_FORWARD" and tokens[last - 3].attr != 2) return False + + class Python26ParserSingle(Python2Parser, PythonParserSingle): pass -if __name__ == '__main__': + +if __name__ == "__main__": # Check grammar p = Python26Parser() p.check_grammar() - from uncompyle6 import PYTHON_VERSION, IS_PYPY - if PYTHON_VERSION == 2.6: + from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE + + if PYTHON_VERSION_TRIPLE[:2] == (2, 6): lhs, rhs, tokens, right_recursive, dup_rhs = p.check_sets() from uncompyle6.scanner import get_scanner - s = get_scanner(PYTHON_VERSION, IS_PYPY) - opcode_set = set(s.opc.opname).union(set( - """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM + + s = get_scanner(PYTHON_VERSION_TRIPLE, IS_PYPY) + opcode_set = set(s.opc.opname).union( + set( + """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LAMBDA_MARKER RETURN_LAST - """.split())) + """.split() + ) + ) remain_tokens = set(tokens) - opcode_set import re - remain_tokens = set([re.sub('_\d+$', '', t) for t in remain_tokens]) - remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens]) + + remain_tokens = set([re.sub(r"_\d+$", "", t) for t in remain_tokens]) + remain_tokens = set([re.sub("_CONT$", "", t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set print(remain_tokens) # print(sorted(p.rule2name.items())) diff --git a/uncompyle6/parsers/parse27.py b/uncompyle6/parsers/parse27.py index efa2011d6..5dc359e7d 100644 --- a/uncompyle6/parsers/parse27.py +++ b/uncompyle6/parsers/parse27.py @@ -1,20 +1,23 @@ -# Copyright (c) 2016-2020 Rocky Bernstein +# Copyright (c) 2016-2020, 2023-2024 Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG from xdis import next_offset + from uncompyle6.parser import PythonParserSingle, nop_func from uncompyle6.parsers.parse2 import Python2Parser from uncompyle6.parsers.reducecheck import ( aug_assign1_check, + except_handler, + for_block_check, + ifelsestmt, or_check, tryelsestmt, - except_handler, ) -class Python27Parser(Python2Parser): +class Python27Parser(Python2Parser): def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): super(Python27Parser, self).__init__(debug_parser) self.customized = {} @@ -34,11 +37,12 @@ def p_comprehension27(self, args): dict_comp ::= LOAD_DICTCOMP MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1 stmt ::= dict_comp_func + dict_comp_func ::= BUILD_MAP_0 LOAD_FAST FOR_ITER store - comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST + comp_iter JUMP_BACK ending_return set_comp_func ::= BUILD_SET_0 LOAD_FAST FOR_ITER store comp_iter - JUMP_BACK RETURN_VALUE RETURN_LAST + JUMP_BACK ending_return comp_iter ::= comp_if_not comp_if_not ::= expr jmp_true comp_iter @@ -86,6 +90,11 @@ def p_try27(self, args): for_block ::= l_stmts_opt JUMP_BACK + # In 2.7 there is occasionally a for_block has an unusual + # form: there is a JUMP_ABSOLUTE which jumps to the second JUMP_BACK + # listed below. Both JUMP_BACKS go to the same position so the + # the JUMP_ABSOLUTE and JUMP_BACK not necessary + for_block ::= l_stmts_opt JUMP_ABSOLUTE JUMP_BACK JUMP_BACK """ def p_jump27(self, args): @@ -107,17 +116,18 @@ def p_jump27(self, args): or ::= expr_jitop expr COME_FROM and ::= expr JUMP_IF_FALSE_OR_POP expr COME_FROM - # compare_chained{1,2} is used exclusively in chained_compare - compare_chained1 ::= expr DUP_TOP ROT_THREE COMPARE_OP JUMP_IF_FALSE_OR_POP - compare_chained1 COME_FROM - compare_chained1 ::= expr DUP_TOP ROT_THREE COMPARE_OP JUMP_IF_FALSE_OR_POP - compare_chained2 COME_FROM + # compare_chained{middle,2} is used exclusively in chained_compare + compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP + JUMP_IF_FALSE_OR_POP compared_chained_middle + COME_FROM + compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP + JUMP_IF_FALSE_OR_POP compare_chained_right COME_FROM return_lambda ::= RETURN_VALUE return_lambda ::= RETURN_VALUE_LAMBDA - compare_chained2 ::= expr COMPARE_OP return_lambda - compare_chained2 ::= expr COMPARE_OP return_lambda + compare_chained_right ::= expr COMPARE_OP return_lambda + compare_chained_right ::= expr COMPARE_OP return_lambda # if_exp_true are for conditions which always evaluate true # There is dead or non-optional remnants of the condition code though, @@ -135,6 +145,7 @@ def p_jump27(self, args): def p_stmt27(self, args): """ stmt ::= ifelsestmtr + stmt ::= ifelsestmtc # assert condition assert ::= assert_expr jmp_true LOAD_ASSERT RAISE_VARARGS_1 @@ -150,9 +161,9 @@ def p_stmt27(self, args): POP_BLOCK LOAD_CONST COME_FROM_WITH WITH_CLEANUP END_FINALLY - withasstmt ::= expr SETUP_WITH store suite_stmts_opt - POP_BLOCK LOAD_CONST COME_FROM_WITH - WITH_CLEANUP END_FINALLY + with_as ::= expr SETUP_WITH store suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM_WITH + WITH_CLEANUP END_FINALLY whilestmt ::= SETUP_LOOP testexpr returns _come_froms POP_BLOCK COME_FROM @@ -168,11 +179,13 @@ def p_stmt27(self, args): while1stmt ::= SETUP_LOOP returns pb_come_from while1stmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK COME_FROM - whilestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK _come_froms + whilestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK + _come_froms # Should this be JUMP_BACK+ ? # JUMP_BACK should all be to the same location - whilestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK JUMP_BACK POP_BLOCK _come_froms + whilestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK + JUMP_BACK POP_BLOCK _come_froms while1elsestmt ::= SETUP_LOOP l_stmts JUMP_BACK POP_BLOCK else_suitel COME_FROM @@ -187,6 +200,7 @@ def p_stmt27(self, args): ifstmt ::= testexpr return_if_stmts COME_FROM ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD else_suite come_froms ifelsestmtc ::= testexpr c_stmts_opt JUMP_ABSOLUTE else_suitec + ifelsestmtc ::= testexpr c_stmts_opt JUMP_FORWARD else_suite come_froms ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel ifelsestmtl ::= testexpr c_stmts_opt CONTINUE else_suitel @@ -214,26 +228,32 @@ def p_stmt27(self, args): def customize_grammar_rules(self, tokens, customize): # 2.7 changes COME_FROM to COME_FROM_FINALLY - self.remove_rules(""" + self.remove_rules( + """ while1elsestmt ::= SETUP_LOOP l_stmts JUMP_BACK else_suite COME_FROM tryfinallystmt ::= SETUP_FINALLY suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM suite_stmts_opt END_FINALLY - """) - if 'PyPy' in customize: + """ + ) + if "PyPy" in customize: # PyPy-specific customizations - self.addRule(""" + self.addRule( + """ return_if_stmt ::= return_expr RETURN_END_IF come_froms - """, nop_func) - + """, + nop_func, + ) super(Python27Parser, self).customize_grammar_rules(tokens, customize) # FIXME: Put more in this table self.reduce_check_table = { - # "ifelsestmt": ifelsestmt, "aug_assign1": aug_assign1_check, "except_handler": except_handler, + "for_block": for_block_check.for_block_invalid, + "ifelsestmt": ifelsestmt, + "ifelsestmtc": ifelsestmt, "or": or_check, "tryelsestmt": tryelsestmt, "tryelsestmtl": tryelsestmt, @@ -246,10 +266,13 @@ def customize_grammar_rules(self, tokens, customize): self.check_reduce["except_handler"] = "tokens" self.check_reduce["except_handler_else"] = "tokens" + self.check_reduce["for_block"] = "tokens" + self.check_reduce["or"] = "AST" self.check_reduce["raise_stmt1"] = "AST" - self.check_reduce["iflaststmtl"] = "AST" self.check_reduce["ifelsestmt"] = "AST" + self.check_reduce["ifelsestmtc"] = "AST" + self.check_reduce["iflaststmtl"] = "AST" self.check_reduce["list_if_not"] = "AST" self.check_reduce["list_if"] = "AST" self.check_reduce["comp_if"] = "AST" @@ -259,16 +282,16 @@ def customize_grammar_rules(self, tokens, customize): return def reduce_is_invalid(self, rule, ast, tokens, first, last): - invalid = super(Python27Parser, - self).reduce_is_invalid(rule, ast, - tokens, first, last) + invalid = super(Python27Parser, self).reduce_is_invalid( + rule, ast, tokens, first, last + ) lhs = rule[0] n = len(tokens) fn = self.reduce_check_table.get(lhs, None) if fn: invalid = fn(self, lhs, n, rule, ast, tokens, first, last) - last = min(last, n-1) + last = min(last, n - 1) if invalid: return invalid @@ -278,8 +301,9 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): return tokens[first].offset < jmp_false[0].attr < tokens[last].offset pass elif (rule[0], rule[1][0:5]) == ( - "if_exp", - ("expr", "jmp_false", "expr", "JUMP_ABSOLUTE", "expr")): + "if_exp", + ("expr", "jmp_false", "expr", "JUMP_ABSOLUTE", "expr"), + ): jmp_false = ast[1] if jmp_false[0] == "POP_JUMP_IF_FALSE": else_instr = ast[4].first_child() @@ -288,19 +312,21 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): end_offset = ast[3].attr return end_offset < tokens[last].offset pass - elif rule[0] == ("raise_stmt1"): + elif rule[0] == "raise_stmt1": return ast[0] == "expr" and ast[0][0] == "or" elif rule[0] in ("assert", "assert2"): jump_inst = ast[1][0] jump_target = jump_inst.attr - return not (last >= len(tokens) - or jump_target == tokens[last].offset - or jump_target == next_offset(ast[-1].op, ast[-1].opc, ast[-1].offset)) + return not ( + last >= len(tokens) + or jump_target == tokens[last].offset + or jump_target == next_offset(ast[-1].op, ast[-1].opc, ast[-1].offset) + ) elif rule == ("ifstmt", ("testexpr", "_ifstmts_jump")): - for i in range(last-1, last-4, -1): + for i in range(last - 1, last - 4, -1): t = tokens[i] if t == "JUMP_FORWARD": - return t.attr > tokens[min(last, len(tokens)-1)].off2int() + return t.attr > tokens[min(last, len(tokens) - 1)].off2int() elif t not in ("POP_TOP", "COME_FROM"): break pass @@ -313,11 +339,11 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): jmp_target = test[1][0].attr if last == len(tokens): last -= 1 - while (isinstance(tokens[first].offset, str) and first < last): + while isinstance(tokens[first].offset, str) and first < last: first += 1 if first == last: return True - while (first < last and isinstance(tokens[last].offset, str)): + while first < last and isinstance(tokens[last].offset, str): last -= 1 return tokens[first].off2int() < jmp_target < tokens[last].off2int() pass @@ -326,30 +352,35 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): elif rule == ("list_if_not", ("expr", "jmp_true", "list_iter")): jump_inst = ast[1][0] jump_offset = jump_inst.attr - return jump_offset > jump_inst.offset and jump_offset < tokens[last].offset + return jump_inst.offset < jump_offset < tokens[last].offset elif rule == ("list_if", ("expr", "jmp_false", "list_iter")): jump_inst = ast[1][0] jump_offset = jump_inst.attr - return jump_offset > jump_inst.offset and jump_offset < tokens[last].offset + return jump_inst.offset < jump_offset < tokens[last].offset elif rule == ("or", ("expr", "jmp_true", "expr", "\\e_come_from_opt")): - # Test that jmp_true doesn"t jump inside the middle the "or" + # Test that jmp_true doesn't jump inside the middle the "or" # or that it jumps to the same place as the end of "and" jmp_true = ast[1][0] jmp_target = jmp_true.offset + jmp_true.attr + 3 - return not (jmp_target == tokens[last].offset or - tokens[last].pattr == jmp_true.pattr) - - elif (rule[0] == "whilestmt" and - rule[1][0:-2] == - ("SETUP_LOOP", "testexpr", "l_stmts_opt", - "JUMP_BACK", "JUMP_BACK")): + return not ( + jmp_target == tokens[last].offset + or tokens[last].pattr == jmp_true.pattr + ) + + elif rule[0] == "whilestmt" and rule[1][0:-2] == ( + "SETUP_LOOP", + "testexpr", + "l_stmts_opt", + "JUMP_BACK", + "JUMP_BACK", + ): # Make sure that the jump backs all go to the same place - i = last-1 - while (tokens[i] != "JUMP_BACK"): + i = last - 1 + while tokens[i] != "JUMP_BACK": i -= 1 - return tokens[i].attr != tokens[i-1].attr + return tokens[i].attr != tokens[i - 1].attr elif rule[0] == "if_exp_true": - return (first) > 0 and tokens[first-1] == "POP_JUMP_IF_FALSE" + return (first) > 0 and tokens[first - 1] == "POP_JUMP_IF_FALSE" return False @@ -357,26 +388,31 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): class Python27ParserSingle(Python27Parser, PythonParserSingle): pass + if __name__ == "__main__": # Check grammar p = Python27Parser() p.check_grammar() - from uncompyle6 import PYTHON_VERSION, IS_PYPY - if PYTHON_VERSION == 2.7: + from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE + + if PYTHON_VERSION_TRIPLE[:2] == (2, 7): lhs, rhs, tokens, right_recursive, dup_rhs = p.check_sets() from uncompyle6.scanner import get_scanner - s = get_scanner(PYTHON_VERSION, IS_PYPY) - opcode_set = set(s.opc.opname).union(set( - """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM + + s = get_scanner(PYTHON_VERSION_TRIPLE, IS_PYPY) + opcode_set = set(s.opc.opname).union( + set( + """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LAMBDA_MARKER RETURN_LAST - """.split())) + """.split() + ) + ) remain_tokens = set(tokens) - opcode_set import re - remain_tokens = set([re.sub(r"_\d+$", "", t) - for t in remain_tokens]) - remain_tokens = set([re.sub("_CONT$", "", t) - for t in remain_tokens]) + + remain_tokens = set([re.sub(r"_\d+$", "", t) for t in remain_tokens]) + remain_tokens = set([re.sub("_CONT$", "", t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set print(remain_tokens) # p.dump_grammar() diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 19f84e718..65d669378 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2022 Rocky Bernstein +# Copyright (c) 2015-2024 Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock @@ -27,22 +27,24 @@ """ import re -from uncompyle6.scanners.tok import Token + +from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG + from uncompyle6.parser import PythonParser, PythonParserSingle, nop_func from uncompyle6.parsers.reducecheck import ( - and_check, + and_invalid, except_handler_else, ifelsestmt, - ifstmt, iflaststmt, + ifstmt, or_check, testtrue, tryelsestmtl3, tryexcept, - while1stmt + while1stmt, ) from uncompyle6.parsers.treenode import SyntaxTree -from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG +from uncompyle6.scanners.tok import Token class Python3Parser(PythonParser): @@ -65,7 +67,9 @@ def p_comprehension3(self, args): list_comp ::= BUILD_LIST_0 list_iter lc_body ::= expr LIST_APPEND - list_for ::= expr FOR_ITER store list_iter jb_or_c + list_for ::= expr_or_arg + FOR_ITER + store list_iter jb_or_c # This is seen in PyPy, but possibly it appears on other Python 3? list_if ::= expr jmp_false list_iter COME_FROM @@ -77,26 +81,36 @@ def p_comprehension3(self, args): stmt ::= set_comp_func + # TODO this can be simplified + set_comp_func ::= BUILD_SET_0 LOAD_ARG FOR_ITER store comp_iter + JUMP_BACK ending_return set_comp_func ::= BUILD_SET_0 LOAD_FAST FOR_ITER store comp_iter - JUMP_BACK RETURN_VALUE RETURN_LAST - - set_comp_func ::= BUILD_SET_0 LOAD_FAST FOR_ITER store comp_iter - COME_FROM JUMP_BACK RETURN_VALUE RETURN_LAST + JUMP_BACK ending_return + set_comp_func ::= BUILD_SET_0 LOAD_ARG FOR_ITER store comp_iter + COME_FROM JUMP_BACK ending_return comp_body ::= dict_comp_body comp_body ::= set_comp_body dict_comp_body ::= expr expr MAP_ADD set_comp_body ::= expr SET_ADD + expr_or_arg ::= LOAD_ARG + expr_or_arg ::= expr # See also common Python p_list_comprehension """ def p_dict_comp3(self, args): - """" + """ " expr ::= dict_comp stmt ::= dict_comp_func + dict_comp_func ::= BUILD_MAP_0 LOAD_ARG FOR_ITER store + comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST + dict_comp_func ::= BUILD_MAP_0 LOAD_ARG FOR_ITER store + comp_iter JUMP_BACK RETURN_VALUE_LAMBDA LAMBDA_MARKER dict_comp_func ::= BUILD_MAP_0 LOAD_FAST FOR_ITER store comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST + dict_comp_func ::= BUILD_MAP_0 LOAD_FAST FOR_ITER store + comp_iter JUMP_BACK RETURN_VALUE_LAMBDA LAMBDA_MARKER comp_iter ::= comp_if_not comp_if_not ::= expr jmp_true comp_iter @@ -273,9 +287,9 @@ def p_grammar(self, args): POP_BLOCK LOAD_CONST COME_FROM_WITH WITH_CLEANUP END_FINALLY - withasstmt ::= expr SETUP_WITH store suite_stmts_opt - POP_BLOCK LOAD_CONST COME_FROM_WITH - WITH_CLEANUP END_FINALLY + with_as ::= expr SETUP_WITH store suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM_WITH + WITH_CLEANUP END_FINALLY expr_jt ::= expr jmp_true expr_jitop ::= expr JUMP_IF_TRUE_OR_POP @@ -342,14 +356,15 @@ def p_jump3(self, args): # FIXME: Common with 2.7 ret_and ::= expr JUMP_IF_FALSE_OR_POP return_expr_or_cond COME_FROM ret_or ::= expr JUMP_IF_TRUE_OR_POP return_expr_or_cond COME_FROM - if_exp_ret ::= expr POP_JUMP_IF_FALSE expr RETURN_END_IF COME_FROM return_expr_or_cond + if_exp_ret ::= expr POP_JUMP_IF_FALSE expr RETURN_END_IF COME_FROM + return_expr_or_cond - # compare_chained1 is used exclusively in chained_compare - compare_chained1 ::= expr DUP_TOP ROT_THREE COMPARE_OP JUMP_IF_FALSE_OR_POP - compare_chained1 COME_FROM - compare_chained1 ::= expr DUP_TOP ROT_THREE COMPARE_OP JUMP_IF_FALSE_OR_POP - compare_chained2 COME_FROM + # compared_chained_middle is used exclusively in chained_compare + compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP JUMP_IF_FALSE_OR_POP + compared_chained_middle COME_FROM + compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP JUMP_IF_FALSE_OR_POP + compare_chained_right COME_FROM """ def p_stmt3(self, args): @@ -415,24 +430,24 @@ def p_loop_stmt3(self, args): for ::= SETUP_LOOP expr for_iter store for_block POP_BLOCK COME_FROM_LOOP - forelsestmt ::= SETUP_LOOP expr for_iter store for_block POP_BLOCK else_suite - COME_FROM_LOOP + forelsestmt ::= SETUP_LOOP expr for_iter store for_block POP_BLOCK + else_suite COME_FROM_LOOP - forelselaststmt ::= SETUP_LOOP expr for_iter store for_block POP_BLOCK else_suitec - COME_FROM_LOOP + forelselaststmt ::= SETUP_LOOP expr for_iter store for_block POP_BLOCK + else_suitec COME_FROM_LOOP - forelselaststmtl ::= SETUP_LOOP expr for_iter store for_block POP_BLOCK else_suitel - COME_FROM_LOOP + forelselaststmtl ::= SETUP_LOOP expr for_iter store for_block POP_BLOCK + else_suitel COME_FROM_LOOP - whilestmt ::= SETUP_LOOP testexpr l_stmts_opt COME_FROM JUMP_BACK POP_BLOCK - COME_FROM_LOOP + whilestmt ::= SETUP_LOOP testexpr l_stmts_opt COME_FROM JUMP_BACK + POP_BLOCK COME_FROM_LOOP - whilestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK JUMP_BACK - COME_FROM_LOOP + whilestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK + JUMP_BACK COME_FROM_LOOP whilestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK COME_FROM_LOOP - whilestmt ::= SETUP_LOOP testexpr returns POP_BLOCK + whilestmt ::= SETUP_LOOP testexpr returns POP_BLOCK COME_FROM_LOOP while1elsestmt ::= SETUP_LOOP l_stmts JUMP_BACK @@ -506,7 +521,7 @@ def custom_build_class_rule(self, opname, i, token, tokens, customize, is_pypy): expr call CALL_FUNCTION_3 - """ + """ # FIXME: I bet this can be simplified # look for next MAKE_FUNCTION for i in range(i + 1, len(tokens)): @@ -537,9 +552,9 @@ def custom_build_class_rule(self, opname, i, token, tokens, customize, is_pypy): # token found, while this one doesn't. if self.version < (3, 6): call_function = self.call_fn_name(call_fn_tok) - args_pos, args_kw = self.get_pos_kw(call_fn_tok) + pos_args_count, kw_args_count = self.get_pos_kw(call_fn_tok) rule = "build_class ::= LOAD_BUILD_CLASS mkfunc %s" "%s" % ( - ("expr " * (args_pos - 1) + ("kwarg " * args_kw)), + ("expr " * (pos_args_count - 1) + ("kwarg " * kw_args_count)), call_function, ) else: @@ -548,10 +563,10 @@ def custom_build_class_rule(self, opname, i, token, tokens, customize, is_pypy): if call_function.startswith("CALL_FUNCTION_KW"): self.addRule("classdef ::= build_class_kw store", nop_func) if is_pypy: - args_pos, args_kw = self.get_pos_kw(call_fn_tok) + pos_args_count, kw_args_count = self.get_pos_kw(call_fn_tok) rule = "build_class_kw ::= LOAD_BUILD_CLASS mkfunc %s%s%s" % ( - "expr " * (args_pos - 1), - "kwarg " * (args_kw), + "expr " * (pos_args_count - 1), + "kwarg " * (kw_args_count), call_function, ) else: @@ -577,7 +592,7 @@ def custom_classfunc_rule(self, opname, token, customize, next_token, is_pypy): classdefdeco2 ::= LOAD_BUILD_CLASS mkfunc {expr}^n-1 CALL_FUNCTION_n """ - args_pos, args_kw = self.get_pos_kw(token) + pos_args_count, kw_args_count = self.get_pos_kw(token) # Additional exprs for * and ** args: # 0 if neither @@ -586,7 +601,7 @@ def custom_classfunc_rule(self, opname, token, customize, next_token, is_pypy): # Yes, this computation based on instruction name is a little bit hoaky. nak = (len(opname) - len("CALL_FUNCTION")) // 3 - uniq_param = args_kw + args_pos + uniq_param = kw_args_count + pos_args_count # Note: 3.5+ have subclassed this method; so we don't handle # 'CALL_FUNCTION_VAR' or 'CALL_FUNCTION_EX' here. @@ -595,16 +610,16 @@ def custom_classfunc_rule(self, opname, token, customize, next_token, is_pypy): token.kind = self.call_fn_name(token) rule = ( "call ::= expr " - + ("pos_arg " * args_pos) - + ("kwarg " * args_kw) + + ("pos_arg " * pos_args_count) + + ("kwarg " * kw_args_count) + token.kind ) else: token.kind = self.call_fn_name(token) rule = ( "call ::= expr " - + ("pos_arg " * args_pos) - + ("kwarg " * args_kw) + + ("pos_arg " * pos_args_count) + + ("kwarg " * kw_args_count) + "expr " * nak + token.kind ) @@ -612,16 +627,20 @@ def custom_classfunc_rule(self, opname, token, customize, next_token, is_pypy): self.add_unique_rule(rule, token.kind, uniq_param, customize) if "LOAD_BUILD_CLASS" in self.seen_ops: - if next_token == "CALL_FUNCTION" and next_token.attr == 1 and args_pos > 1: + if ( + next_token == "CALL_FUNCTION" + and next_token.attr == 1 + and pos_args_count > 1 + ): rule = "classdefdeco2 ::= LOAD_BUILD_CLASS mkfunc %s%s_%d" % ( - ("expr " * (args_pos - 1)), + ("expr " * (pos_args_count - 1)), opname, - args_pos, + pos_args_count, ) self.add_unique_rule(rule, token.kind, uniq_param, customize) def add_make_function_rule(self, rule, opname, attr, customize): - """Python 3.3 added a an addtional LOAD_STR before MAKE_FUNCTION and + """Python 3.3 added a an additional LOAD_STR before MAKE_FUNCTION and this has an effect on many rules. """ if self.version >= (3, 3): @@ -687,7 +706,7 @@ def customize_grammar_rules(self, tokens, customize): # Note: BUILD_TUPLE_UNPACK_WITH_CALL gets considered by # default because it starts with BUILD. So we'll set to ignore it from # the start. - custom_ops_processed = set(("BUILD_TUPLE_UNPACK_WITH_CALL",)) + custom_ops_processed = {"BUILD_TUPLE_UNPACK_WITH_CALL"} # A set of instruction operation names that exist in the token stream. # We use this customize the grammar that we create. @@ -748,18 +767,43 @@ def customize_grammar_rules(self, tokens, customize): kvlist_n = "expr " * (token.attr) rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname) self.addRule(rule, nop_func) + + elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"): + if opname == "BUILD_CONST_DICT": + rule = ( + """ + add_consts ::= ADD_VALUE* + const_list ::= COLLECTION_START add_consts %s + dict ::= const_list + expr ::= dict + """ + % opname + ) + else: + rule = ( + """ + add_consts ::= ADD_VALUE* + const_list ::= COLLECTION_START add_consts %s + expr ::= const_list + """ + % opname + ) + self.addRule(rule, nop_func) + elif opname.startswith("BUILD_DICT_OLDER"): rule = """dict ::= COLLECTION_START key_value_pairs BUILD_DICT_OLDER key_value_pairs ::= key_value_pair+ key_value_pair ::= ADD_KEY ADD_VALUE """ self.addRule(rule, nop_func) + elif opname.startswith("BUILD_LIST_UNPACK"): v = token.attr rule = "build_list_unpack ::= %s%s" % ("expr " * v, opname) self.addRule(rule, nop_func) rule = "expr ::= build_list_unpack" self.addRule(rule, nop_func) + elif opname_base in ("BUILD_MAP", "BUILD_MAP_UNPACK"): kvlist_n = "kvlist_%s" % token.attr if opname == "BUILD_MAP_n": @@ -822,18 +866,24 @@ def customize_grammar_rules(self, tokens, customize): elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"): if opname == "BUILD_CONST_DICT": - rule = f""" + rule = ( + """ add_consts ::= ADD_VALUE* - const_list ::= COLLECTION_START add_consts {opname} + const_list ::= COLLECTION_START add_consts %s dict ::= const_list expr ::= dict """ + % opname + ) else: - rule = f""" + rule = ( + """ add_consts ::= ADD_VALUE* - const_list ::= COLLECTION_START add_consts {opname} + const_list ::= COLLECTION_START add_consts %s expr ::= const_list """ + % opname + ) self.addRule(rule, nop_func) elif opname_base in ( @@ -914,7 +964,6 @@ def customize_grammar_rules(self, tokens, customize): "CALL_FUNCTION_VAR_KW", ) ) or opname.startswith("CALL_FUNCTION_KW"): - if opname == "CALL_FUNCTION" and token.attr == 1: rule = """ dict_comp ::= LOAD_DICTCOMP LOAD_STR MAKE_FUNCTION_0 expr @@ -932,14 +981,14 @@ def customize_grammar_rules(self, tokens, customize): elif opname_base == "CALL_METHOD": # PyPy and Python 3.7+ only - DRY with parse2 - args_pos, args_kw = self.get_pos_kw(token) + pos_args_count, kw_args_count = self.get_pos_kw(token) # number of apply equiv arguments: nak = (len(opname_base) - len("CALL_METHOD")) // 3 rule = ( "call ::= expr " - + ("pos_arg " * args_pos) - + ("kwarg " * args_kw) + + ("pos_arg " * pos_args_count) + + ("kwarg " * kw_args_count) + "expr " * nak + opname ) @@ -1036,7 +1085,9 @@ def customize_grammar_rules(self, tokens, customize): ) custom_ops_processed.add(opname) elif opname == "LOAD_LISTCOMP": - self.add_unique_rule("expr ::= listcomp", opname, token.attr, customize) + self.add_unique_rule( + "expr ::= list_comp", opname, token.attr, customize + ) custom_ops_processed.add(opname) elif opname == "LOAD_SETCOMP": # Should this be generalized and put under MAKE_FUNCTION? @@ -1053,9 +1104,8 @@ def customize_grammar_rules(self, tokens, customize): # A PyPy speciality - DRY with parse3 self.addRule( """ - expr ::= attribute - attribute ::= expr LOOKUP_METHOD - """, + attribute ::= expr LOOKUP_METHOD + """, nop_func, ) custom_ops_processed.add(opname) @@ -1074,7 +1124,7 @@ def customize_grammar_rules(self, tokens, customize): """ self.addRule(rule, nop_func) - args_pos, args_kw, annotate_args = token.attr + pos_args_count, kw_args_count, annotate_args = token.attr # FIXME: Fold test into add_make_function_rule if self.version < (3, 3): @@ -1083,7 +1133,7 @@ def customize_grammar_rules(self, tokens, customize): j = 2 if self.is_pypy or (i >= j and tokens[i - j] == "LOAD_LAMBDA"): rule_pat = "lambda_body ::= %sload_closure LOAD_LAMBDA %%s%s" % ( - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, opname, ) self.add_make_function_rule(rule_pat, opname, token.attr, customize) @@ -1091,7 +1141,8 @@ def customize_grammar_rules(self, tokens, customize): if has_get_iter_call_function1: rule_pat = ( "generator_exp ::= %sload_closure load_genexpr %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * args_pos, opname) + "GET_ITER CALL_FUNCTION_1" + % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule(rule_pat, opname, token.attr, customize) @@ -1105,9 +1156,9 @@ def customize_grammar_rules(self, tokens, customize): # and have GET_ITER CALL_FUNCTION_1 # Todo: For Pypy we need to modify this slightly rule_pat = ( - "listcomp ::= %sload_closure LOAD_LISTCOMP %%s%s expr " + "list_comp ::= %sload_closure LOAD_LISTCOMP %%s%s expr " "GET_ITER CALL_FUNCTION_1" - % ("pos_arg " * args_pos, opname) + % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize @@ -1116,7 +1167,7 @@ def customize_grammar_rules(self, tokens, customize): rule_pat = ( "set_comp ::= %sload_closure LOAD_SETCOMP %%s%s expr " "GET_ITER CALL_FUNCTION_1" - % ("pos_arg " * args_pos, opname) + % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize @@ -1127,13 +1178,13 @@ def customize_grammar_rules(self, tokens, customize): self.add_unique_rule( "dict_comp ::= %sload_closure LOAD_DICTCOMP %s " "expr GET_ITER CALL_FUNCTION_1" - % ("pos_arg " * args_pos, opname), + % ("pos_arg " * pos_args_count, opname), opname, token.attr, customize, ) - if args_kw > 0: + if kw_args_count > 0: kwargs_str = "kwargs " else: kwargs_str = "" @@ -1145,36 +1196,56 @@ def customize_grammar_rules(self, tokens, customize): "mkfunc_annotate ::= %s%s%sannotate_tuple load_closure LOAD_CODE %s" % ( kwargs_str, - "pos_arg " * args_pos, - "annotate_arg " * (annotate_args - 1), + "pos_arg " * pos_args_count, + "annotate_arg " * (annotate_args), opname, ) ) else: rule = "mkfunc ::= %s%sload_closure LOAD_CODE %s" % ( kwargs_str, - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, opname, ) - elif self.version == (3, 3): + self.add_unique_rule(rule, opname, token.attr, customize) + + elif (3, 3) <= self.version < (3, 6): + # FIXME move this into version-specific custom rules. + # In fact, some of this has been done for 3.3. if annotate_args > 0: rule = ( "mkfunc_annotate ::= %s%s%sannotate_tuple load_closure LOAD_CODE LOAD_STR %s" % ( kwargs_str, - "pos_arg " * args_pos, - "annotate_arg " * (annotate_args - 1), + "pos_arg " * pos_args_count, + "annotate_arg " * (annotate_args), opname, ) ) else: - rule = "mkfunc ::= %s%sload_closure LOAD_CODE LOAD_STR %s" % ( - kwargs_str, - "pos_arg " * args_pos, + if self.version == (3, 3): + # 3.3 puts kwargs before pos_arg + pos_kw_tuple = ( + ("kwargs " * kw_args_count), + ("pos_arg " * pos_args_count), + ) + else: + # 3.4 and 3.5 puts pos_arg before kwargs + pos_kw_tuple = ( + "pos_arg " * (pos_args_count), + ("kwargs " * kw_args_count), + ) + rule = ( + "mkfunc ::= %s%s%s " "load_closure LOAD_CODE LOAD_STR %s" + ) % ( + pos_kw_tuple[0], + pos_kw_tuple[1], + "annotate_pair " * (annotate_args), opname, ) + self.add_unique_rule(rule, opname, token.attr, customize) - elif self.version >= (3, 4): + if self.version >= (3, 4): if not self.is_pypy: load_op = "LOAD_STR" else: @@ -1184,33 +1255,33 @@ def customize_grammar_rules(self, tokens, customize): rule = ( "mkfunc_annotate ::= %s%s%sannotate_tuple load_closure %s %s" % ( - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, kwargs_str, - "annotate_arg " * (annotate_args - 1), + "annotate_arg " * (annotate_args), load_op, opname, ) ) else: rule = "mkfunc ::= %s%s load_closure LOAD_CODE %s %s" % ( - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, kwargs_str, load_op, opname, ) - self.add_unique_rule(rule, opname, token.attr, customize) + self.add_unique_rule(rule, opname, token.attr, customize) - if args_kw == 0: + if kw_args_count == 0: rule = "mkfunc ::= %sload_closure load_genexpr %s" % ( - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, opname, ) self.add_unique_rule(rule, opname, token.attr, customize) if self.version < (3, 4): rule = "mkfunc ::= %sload_closure LOAD_CODE %s" % ( - "expr " * args_pos, + "expr " * pos_args_count, opname, ) self.add_unique_rule(rule, opname, token.attr, customize) @@ -1221,10 +1292,10 @@ def customize_grammar_rules(self, tokens, customize): if self.version >= (3, 6): # The semantics of MAKE_FUNCTION in 3.6 are totally different from # before. - args_pos, args_kw, annotate_args, closure = token.attr - stack_count = args_pos + args_kw + annotate_args + pos_args_count, kw_args_count, annotate_args, closure = token.attr + stack_count = pos_args_count + kw_args_count + annotate_args if closure: - if args_pos: + if pos_args_count: rule = "lambda_body ::= %s%s%s%s" % ( "expr " * stack_count, "load_closure " * closure, @@ -1257,14 +1328,16 @@ def customize_grammar_rules(self, tokens, customize): if has_get_iter_call_function1: rule_pat = ( "generator_exp ::= %sload_genexpr %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * args_pos, opname) + "GET_ITER CALL_FUNCTION_1" + % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize ) rule_pat = ( "generator_exp ::= %sload_closure load_genexpr %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * args_pos, opname) + "GET_ITER CALL_FUNCTION_1" + % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize @@ -1277,16 +1350,16 @@ def customize_grammar_rules(self, tokens, customize): # 'exprs' in the rule above into a # tuple. rule_pat = ( - "listcomp ::= load_closure LOAD_LISTCOMP %%s%s " + "list_comp ::= load_closure LOAD_LISTCOMP %%s%s " "expr GET_ITER CALL_FUNCTION_1" % (opname,) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize ) rule_pat = ( - "listcomp ::= %sLOAD_LISTCOMP %%s%s expr " + "list_comp ::= %sLOAD_LISTCOMP %%s%s expr " "GET_ITER CALL_FUNCTION_1" - % ("expr " * args_pos, opname) + % ("expr " * pos_args_count, opname) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize @@ -1294,8 +1367,8 @@ def customize_grammar_rules(self, tokens, customize): if self.is_pypy or (i >= 2 and tokens[i - 2] == "LOAD_LAMBDA"): rule_pat = "lambda_body ::= %s%sLOAD_LAMBDA %%s%s" % ( - ("pos_arg " * args_pos), - ("kwarg " * args_kw), + ("pos_arg " * pos_args_count), + ("kwarg " * kw_args_count), opname, ) self.add_make_function_rule( @@ -1304,9 +1377,9 @@ def customize_grammar_rules(self, tokens, customize): continue if self.version < (3, 6): - args_pos, args_kw, annotate_args = token.attr + pos_args_count, kw_args_count, annotate_args = token.attr else: - args_pos, args_kw, annotate_args, closure = token.attr + pos_args_count, kw_args_count, annotate_args, closure = token.attr if self.version < (3, 3): j = 1 @@ -1316,7 +1389,8 @@ def customize_grammar_rules(self, tokens, customize): if has_get_iter_call_function1: rule_pat = ( "generator_exp ::= %sload_genexpr %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * args_pos, opname) + "GET_ITER CALL_FUNCTION_1" + % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule(rule_pat, opname, token.attr, customize) @@ -1327,8 +1401,9 @@ def customize_grammar_rules(self, tokens, customize): # and have GET_ITER CALL_FUNCTION_1 # Todo: For Pypy we need to modify this slightly rule_pat = ( - "listcomp ::= %sLOAD_LISTCOMP %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("expr " * args_pos, opname) + "list_comp ::= %sLOAD_LISTCOMP %%s%s expr " + "GET_ITER CALL_FUNCTION_1" + % ("expr " * pos_args_count, opname) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize @@ -1337,13 +1412,13 @@ def customize_grammar_rules(self, tokens, customize): # FIXME: Fold test into add_make_function_rule if self.is_pypy or (i >= j and tokens[i - j] == "LOAD_LAMBDA"): rule_pat = "lambda_body ::= %s%sLOAD_LAMBDA %%s%s" % ( - ("pos_arg " * args_pos), - ("kwarg " * args_kw), + ("pos_arg " * pos_args_count), + ("kwarg " * kw_args_count), opname, ) self.add_make_function_rule(rule_pat, opname, token.attr, customize) - if args_kw == 0: + if kw_args_count == 0: kwargs = "no_kwargs" self.add_unique_rule("no_kwargs ::=", opname, token.attr, customize) else: @@ -1353,13 +1428,13 @@ def customize_grammar_rules(self, tokens, customize): # positional args after keyword args rule = "mkfunc ::= %s %s%s%s" % ( kwargs, - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, "LOAD_CODE ", opname, ) self.add_unique_rule(rule, opname, token.attr, customize) rule = "mkfunc ::= %s%s%s" % ( - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, "LOAD_CODE ", opname, ) @@ -1367,14 +1442,14 @@ def customize_grammar_rules(self, tokens, customize): # positional args after keyword args rule = "mkfunc ::= %s %s%s%s" % ( kwargs, - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, "LOAD_CODE LOAD_STR ", opname, ) elif self.version >= (3, 6): # positional args before keyword args rule = "mkfunc ::= %s%s %s%s" % ( - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, kwargs, "LOAD_CODE LOAD_STR ", opname, @@ -1382,7 +1457,7 @@ def customize_grammar_rules(self, tokens, customize): elif self.version >= (3, 4): # positional args before keyword args rule = "mkfunc ::= %s%s %s%s" % ( - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, kwargs, "LOAD_CODE LOAD_STR ", opname, @@ -1390,7 +1465,7 @@ def customize_grammar_rules(self, tokens, customize): else: rule = "mkfunc ::= %s%sexpr %s" % ( kwargs, - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, opname, ) self.add_unique_rule(rule, opname, token.attr, customize) @@ -1400,73 +1475,59 @@ def customize_grammar_rules(self, tokens, customize): rule = ( "mkfunc_annotate ::= %s%sannotate_tuple LOAD_CODE LOAD_STR %s" % ( - ("pos_arg " * (args_pos)), - ("call " * (annotate_args - 1)), + ("pos_arg " * pos_args_count), + ("call " * annotate_args), opname, ) ) self.add_unique_rule(rule, opname, token.attr, customize) rule = ( - "mkfunc_annotate ::= %s%sannotate_tuple LOAD_CODE LOAD_STR %s" - % ( - ("pos_arg " * (args_pos)), - ("annotate_arg " * (annotate_args - 1)), - opname, - ) + "mkfunc_annotate ::= %s%sannotate_tuple LOAD_CODE " + "LOAD_STR %s" + ) % ( + ("pos_arg " * pos_args_count), + ("annotate_arg " * annotate_args), + opname, ) if self.version >= (3, 3): - # Normally we remove EXTENDED_ARG from the opcodes, but in the case of - # annotated functions can use the EXTENDED_ARG tuple to signal we have an annotated function. - # Yes this is a little hacky if self.version == (3, 3): # 3.3 puts kwargs before pos_arg pos_kw_tuple = ( - ("kwargs " * args_kw), - ("pos_arg " * (args_pos)), + ("kwargs " * kw_args_count), + ("pos_arg " * pos_args_count), ) else: - # 3.4 and 3.5puts pos_arg before kwargs + # 3.4 and 3.5 puts pos_arg before kwargs pos_kw_tuple = ( - "pos_arg " * (args_pos), - ("kwargs " * args_kw), + "pos_arg " * (pos_args_count), + ("kwargs " * kw_args_count), ) rule = ( - "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE LOAD_STR EXTENDED_ARG %s" - % ( - pos_kw_tuple[0], - pos_kw_tuple[1], - ("call " * (annotate_args - 1)), - opname, - ) - ) - self.add_unique_rule(rule, opname, token.attr, customize) - rule = ( - "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE LOAD_STR EXTENDED_ARG %s" - % ( - pos_kw_tuple[0], - pos_kw_tuple[1], - ("annotate_arg " * (annotate_args - 1)), - opname, - ) + "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE " + "LOAD_STR %s" + ) % ( + pos_kw_tuple[0], + pos_kw_tuple[1], + ("annotate_arg " * annotate_args), + opname, ) else: - # See above comment about use of EXTENDED_ARG rule = ( - "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE EXTENDED_ARG %s" + "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE %s" % ( - ("kwargs " * args_kw), - ("pos_arg " * (args_pos)), - ("annotate_arg " * (annotate_args - 1)), + ("kwargs " * kw_args_count), + ("pos_arg " * (pos_args_count)), + ("annotate_arg " * annotate_args), opname, ) ) self.add_unique_rule(rule, opname, token.attr, customize) rule = ( - "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE EXTENDED_ARG %s" + "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE %s" % ( - ("kwargs " * args_kw), - ("pos_arg " * (args_pos)), - ("call " * (annotate_args - 1)), + ("kwargs " * kw_args_count), + ("pos_arg " * pos_args_count), + ("call " * annotate_args), opname, ) ) @@ -1559,7 +1620,7 @@ def customize_grammar_rules(self, tokens, customize): } if self.version == (3, 6): - self.reduce_check_table["and"] = and_check + self.reduce_check_table["and"] = and_invalid self.check_reduce["and"] = "AST" self.check_reduce["annotate_tuple"] = "noAST" @@ -1589,7 +1650,7 @@ def customize_grammar_rules(self, tokens, customize): def reduce_is_invalid(self, rule, ast, tokens, first, last): lhs = rule[0] n = len(tokens) - last = min(last, n-1) + last = min(last, n - 1) fn = self.reduce_check_table.get(lhs, None) if fn: if fn(self, lhs, n, rule, ast, tokens, first, last): @@ -1603,8 +1664,6 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): elif lhs == "kwarg": arg = tokens[first].attr return not (isinstance(arg, str) or isinstance(arg, unicode)) - elif lhs in ("iflaststmt", "iflaststmtl") and self.version[:2] == (3, 6): - return ifstmt(self, lhs, n, rule, ast, tokens, first, last) elif rule == ("ifstmt", ("testexpr", "_ifstmts_jump")): # FIXME: go over what's up with 3.0. Evetually I'd like to remove RETURN_END_IF if self.version <= (3, 0) or tokens[last] == "RETURN_END_IF": @@ -1617,13 +1676,18 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): condition_jump2 = tokens[min(last - 1, len(tokens) - 1)] # If there are two *distinct* condition jumps, they should not jump to the # same place. Otherwise we have some sort of "and"/"or". - if condition_jump2.kind.startswith("POP_JUMP_IF") and condition_jump != condition_jump2: + if ( + condition_jump2.kind.startswith("POP_JUMP_IF") + and condition_jump != condition_jump2 + ): return condition_jump.attr == condition_jump2.attr - if tokens[last] == "COME_FROM" and tokens[last].off2int() != condition_jump.attr: + if ( + tokens[last] == "COME_FROM" + and tokens[last].off2int() != condition_jump.attr + ): return False - # if condition_jump.attr < condition_jump2.off2int(): # print("XXX", first, last) # for t in range(first, last): print(tokens[t]) @@ -1645,7 +1709,6 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): < tokens[last].off2int() ) elif lhs == "while1stmt": - if while1stmt(self, lhs, n, rule, ast, tokens, first, last): return True @@ -1667,7 +1730,6 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): return True return False elif lhs == "while1elsestmt": - n = len(tokens) if last == n: # Adjust for fuzziness in parsing diff --git a/uncompyle6/parsers/parse30.py b/uncompyle6/parsers/parse30.py index 79ecbe738..b9ac15902 100644 --- a/uncompyle6/parsers/parse30.py +++ b/uncompyle6/parsers/parse30.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2017 Rocky Bernstein +# Copyright (c) 2016-2017, 2022-2024 Rocky Bernstein """ spark grammar differences over Python 3.1 for Python 3.0. """ @@ -7,20 +7,21 @@ from uncompyle6.parser import PythonParserSingle from uncompyle6.parsers.parse31 import Python31Parser -class Python30Parser(Python31Parser): +class Python30Parser(Python31Parser): def p_30(self, args): """ pt_bp ::= POP_TOP POP_BLOCK - assert ::= assert_expr jmp_true LOAD_ASSERT RAISE_VARARGS_1 COME_FROM POP_TOP - assert2 ::= assert_expr jmp_true LOAD_ASSERT expr CALL_FUNCTION_1 RAISE_VARARGS_1 - come_froms + assert ::= assert_expr jmp_true LOAD_ASSERT RAISE_VARARGS_1 + COME_FROM POP_TOP + assert2 ::= assert_expr jmp_true LOAD_ASSERT expr CALL_FUNCTION_1 + RAISE_VARARGS_1 come_froms call_stmt ::= expr _come_froms POP_TOP - return_if_lambda ::= RETURN_END_IF_LAMBDA COME_FROM POP_TOP - compare_chained2 ::= expr COMPARE_OP RETURN_END_IF_LAMBDA + return_if_lambda ::= RETURN_END_IF_LAMBDA COME_FROM POP_TOP + compare_chained_right ::= expr COMPARE_OP RETURN_END_IF_LAMBDA # FIXME: combine with parse3.2 whileTruestmt ::= SETUP_LOOP l_stmts_opt @@ -30,8 +31,8 @@ def p_30(self, args): # In many ways Python 3.0 code generation is more like Python 2.6 than # it is 2.7 or 3.1. So we have a number of 2.6ish (and before) rules below - # Specifically POP_TOP is more prevelant since there is no POP_JUMP_IF_... - # instructions + # Specifically POP_TOP is more prevalant since there is no POP_JUMP_IF_... + # instructions. _ifstmts_jump ::= c_stmts JUMP_FORWARD _come_froms POP_TOP COME_FROM _ifstmts_jump ::= c_stmts COME_FROM POP_TOP @@ -65,7 +66,7 @@ def p_30(self, args): iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE COME_FROM POP_TOP - withasstmt ::= expr setupwithas store suite_stmts_opt + with_as ::= expr setupwithas store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_FINALLY LOAD_FAST DELETE_FAST WITH_CLEANUP END_FINALLY setupwithas ::= DUP_TOP LOAD_ATTR STORE_FAST LOAD_ATTR CALL_FUNCTION_0 setup_finally @@ -73,9 +74,10 @@ def p_30(self, args): # Need to keep LOAD_FAST as index 1 set_comp_header ::= BUILD_SET_0 DUP_TOP STORE_FAST + set_comp_func ::= set_comp_header - LOAD_FAST FOR_ITER store comp_iter - JUMP_BACK POP_TOP JUMP_BACK RETURN_VALUE RETURN_LAST + LOAD_ARG FOR_ITER store comp_iter + JUMP_BACK ending_return list_comp_header ::= BUILD_LIST_0 DUP_TOP STORE_FAST list_comp ::= list_comp_header @@ -85,6 +87,11 @@ def p_30(self, args): LOAD_FAST FOR_ITER store comp_iter JUMP_BACK _come_froms POP_TOP JUMP_BACK + list_for ::= DUP_TOP STORE_FAST + expr_or_arg + FOR_ITER + store list_iter jb_or_c + set_comp ::= set_comp_header LOAD_FAST FOR_ITER store comp_iter JUMP_BACK @@ -97,6 +104,11 @@ def p_30(self, args): LOAD_FAST FOR_ITER store dict_comp_iter JUMP_BACK _come_froms POP_TOP JUMP_BACK + dict_comp_func ::= BUILD_MAP_0 + DUP_TOP STORE_FAST + LOAD_ARG FOR_ITER store + dict_comp_iter JUMP_BACK ending_return + stmt ::= try_except30 try_except30 ::= SETUP_EXCEPT suite_stmts_opt _come_froms pt_bp @@ -105,11 +117,13 @@ def p_30(self, args): # From Python 2.6 - list_iter ::= list_if JUMP_BACK - list_iter ::= list_if JUMP_BACK _come_froms POP_TOP - lc_body ::= LOAD_NAME expr LIST_APPEND - lc_body ::= LOAD_FAST expr LIST_APPEND - list_if ::= expr jmp_false_then list_iter + lc_body ::= LOAD_FAST expr LIST_APPEND + lc_body ::= LOAD_NAME expr LIST_APPEND + list_if ::= expr jmp_false_then list_iter + list_if_not ::= expr jmp_true list_iter JUMP_BACK come_froms POP_TOP + list_iter ::= list_if JUMP_BACK + list_iter ::= list_if JUMP_BACK _come_froms POP_TOP + ############# dict_comp_iter ::= expr expr ROT_TWO expr STORE_SUBSCR @@ -193,33 +207,38 @@ def p_30(self, args): come_froms POP_TOP POP_BLOCK COME_FROM_LOOP - # compare_chained is like x <= y <= z - compare_chained1 ::= expr DUP_TOP ROT_THREE COMPARE_OP - jmp_false compare_chained1 _come_froms - compare_chained1 ::= expr DUP_TOP ROT_THREE COMPARE_OP - jmp_false compare_chained2 _come_froms - compare_chained2 ::= expr COMPARE_OP RETURN_END_IF + # A "compare_chained" is two comparisons like x <= y <= z + compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP + jmp_false compared_chained_middle _come_froms + compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP + jmp_false compare_chained_right _come_froms + compare_chained_right ::= expr COMPARE_OP RETURN_END_IF """ - def remove_rules_30(self): - self.remove_rules(""" + self.remove_rules( + """ # The were found using grammar coverage while1stmt ::= SETUP_LOOP l_stmts COME_FROM JUMP_BACK COME_FROM_LOOP whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK COME_FROM_LOOP - whileelsestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK else_suitel COME_FROM_LOOP - whilestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK COME_FROM_LOOP - whilestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK JUMP_BACK COME_FROM_LOOP + whileelsestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK + else_suitel COME_FROM_LOOP + whilestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK + COME_FROM_LOOP + whilestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK JUMP_BACK + COME_FROM_LOOP whilestmt ::= SETUP_LOOP testexpr returns POP_TOP POP_BLOCK COME_FROM_LOOP - withasstmt ::= expr SETUP_WITH store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_WITH WITH_CLEANUP END_FINALLY - with ::= expr SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_WITH WITH_CLEANUP END_FINALLY + with_as ::= expr SETUP_WITH store suite_stmts_opt POP_BLOCK LOAD_CONST + COME_FROM_WITH WITH_CLEANUP END_FINALLY + with ::= expr SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK LOAD_CONST + COME_FROM_WITH WITH_CLEANUP END_FINALLY # lc_body ::= LOAD_FAST expr LIST_APPEND # lc_body ::= LOAD_NAME expr LIST_APPEND # lc_body ::= expr LIST_APPEND # list_comp ::= BUILD_LIST_0 list_iter - # list_for ::= expr FOR_ITER store list_iter jb_or_c + list_for ::= expr FOR_ITER store list_iter jb_or_c # list_if ::= expr jmp_false list_iter # list_if ::= expr jmp_false_then list_iter # list_if_not ::= expr jmp_true list_iter @@ -258,10 +277,11 @@ def remove_rules_30(self): jmp_true ::= JUMP_IF_TRUE_OR_POP POP_TOP jmp_true ::= POP_JUMP_IF_TRUE - compare_chained1 ::= expr DUP_TOP ROT_THREE COMPARE_OP JUMP_IF_FALSE_OR_POP - compare_chained1 COME_FROM - compare_chained1 ::= expr DUP_TOP ROT_THREE COMPARE_OP JUMP_IF_FALSE_OR_POP - compare_chained2 COME_FROM + compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP + JUMP_IF_FALSE_OR_POP compared_chained_middle + COME_FROM + compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP + JUMP_IF_FALSE_OR_POP compare_chained_right COME_FROM ret_or ::= expr JUMP_IF_TRUE_OR_POP return_expr_or_cond COME_FROM ret_and ::= expr JUMP_IF_FALSE_OR_POP return_expr_or_cond COME_FROM if_exp_ret ::= expr POP_JUMP_IF_FALSE expr RETURN_END_IF @@ -270,29 +290,30 @@ def remove_rules_30(self): or ::= expr JUMP_IF_TRUE_OR_POP expr COME_FROM and ::= expr JUMP_IF_TRUE_OR_POP expr COME_FROM and ::= expr JUMP_IF_FALSE_OR_POP expr COME_FROM - """) + """ + ) def customize_grammar_rules(self, tokens, customize): super(Python30Parser, self).customize_grammar_rules(tokens, customize) self.remove_rules_30() self.check_reduce["iflaststmtl"] = "AST" - self.check_reduce['ifstmt'] = "AST" + self.check_reduce["ifstmt"] = "AST" self.check_reduce["ifelsestmtc"] = "AST" self.check_reduce["ifelsestmt"] = "AST" # self.check_reduce["and"] = "stmt" return def reduce_is_invalid(self, rule, ast, tokens, first, last): - invalid = super(Python30Parser, - self).reduce_is_invalid(rule, ast, - tokens, first, last) + invalid = super(Python30Parser, self).reduce_is_invalid( + rule, ast, tokens, first, last + ) if invalid: return invalid lhs = rule[0] if ( - lhs in ("iflaststmtl", "ifstmt", - "ifelsestmt", "ifelsestmtc") and ast[0] == "testexpr" + lhs in ("iflaststmtl", "ifstmt", "ifelsestmt", "ifelsestmtc") + and ast[0] == "testexpr" ): testexpr = ast[0] if testexpr[0] == "testfalse": @@ -300,7 +321,10 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): if lhs == "ifelsestmtc" and ast[2] == "jump_absolute_else": jump_absolute_else = ast[2] come_from = jump_absolute_else[2] - return come_from == "COME_FROM" and come_from.attr < tokens[first].offset + return ( + come_from == "COME_FROM" + and come_from.attr < tokens[first].offset + ) pass elif lhs in ("ifelsestmt", "ifelsestmtc") and ast[2] == "jump_cf_pop": jump_cf_pop = ast[2] @@ -323,11 +347,11 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): jmp_false = testfalse[1] if last == len(tokens): last -= 1 - while (isinstance(tokens[first].offset, str) and first < last): + while isinstance(tokens[first].offset, str) and first < last: first += 1 if first == last: return True - while (first < last and isinstance(tokens[last].offset, str)): + while first < last and isinstance(tokens[last].offset, str): last -= 1 if rule[0] == "iflaststmtl": return not (jmp_false[0].attr <= tokens[last].offset) @@ -335,8 +359,9 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): jmp_false_target = jmp_false[0].attr if tokens[first].offset > jmp_false_target: return True - return ( - (jmp_false_target > tokens[last].offset) and tokens[last] != "JUMP_FORWARD") + return (jmp_false_target > tokens[last].offset) and tokens[ + last + ] != "JUMP_FORWARD" pass pass pass @@ -345,33 +370,43 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): pass + class Python30ParserSingle(Python30Parser, PythonParserSingle): pass -if __name__ == '__main__': + +if __name__ == "__main__": # Check grammar p = Python30Parser() p.remove_rules_30() p.check_grammar() - from uncompyle6 import PYTHON_VERSION, IS_PYPY - if PYTHON_VERSION == 3.0: + from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE + + if PYTHON_VERSION_TRIPLE[:2] == (3, 0): lhs, rhs, tokens, right_recursive, dup_rhs = p.check_sets() from uncompyle6.scanner import get_scanner - s = get_scanner(PYTHON_VERSION, IS_PYPY) - opcode_set = set(s.opc.opname).union(set( - """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM + + s = get_scanner(PYTHON_VERSION_TRIPLE, IS_PYPY) + opcode_set = set(s.opc.opname).union( + set( + """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME LAMBDA_MARKER RETURN_LAST - """.split())) + """.split() + ) + ) ## FIXME: try this remain_tokens = set(tokens) - opcode_set import re - remain_tokens = set([re.sub(r'_\d+$', '', t) for t in remain_tokens]) - remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens]) + + remain_tokens = set([re.sub(r"_\d+$", "", t) for t in remain_tokens]) + remain_tokens = set([re.sub("_CONT$", "", t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set print(remain_tokens) import sys + if len(sys.argv) > 1: from spark_parser.spark import rule2str + for rule in sorted(p.rule2name.items()): print(rule2str(rule[0])) diff --git a/uncompyle6/parsers/parse31.py b/uncompyle6/parsers/parse31.py index 8e04fae6f..94516372c 100644 --- a/uncompyle6/parsers/parse31.py +++ b/uncompyle6/parsers/parse31.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2017 Rocky Bernstein +# Copyright (c) 2016-2017, 2022, 2024 Rocky Bernstein """ spark grammar differences over Python 3.2 for Python 3.1. """ @@ -7,8 +7,8 @@ from uncompyle6.parser import PythonParserSingle from uncompyle6.parsers.parse32 import Python32Parser -class Python31Parser(Python32Parser): +class Python31Parser(Python32Parser): def p_31(self, args): """ subscript2 ::= expr expr DUP_TOPX BINARY_SUBSCR @@ -20,10 +20,10 @@ def p_31(self, args): POP_BLOCK LOAD_CONST COME_FROM_FINALLY load delete WITH_CLEANUP END_FINALLY - # Keeps Python 3.1 withas desigator in the same position as it is in other version + # Keeps Python 3.1 "with .. as" designator in the same position as it is in other version. setupwithas31 ::= setupwithas SETUP_FINALLY load delete - withasstmt ::= expr setupwithas31 store + with_as ::= expr setupwithas31 store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_FINALLY load delete WITH_CLEANUP END_FINALLY @@ -32,49 +32,63 @@ def p_31(self, args): load ::= LOAD_FAST load ::= LOAD_NAME """ + def remove_rules_31(self): - self.remove_rules(""" + self.remove_rules( + """ # DUP_TOP_TWO is DUP_TOPX in 3.1 and earlier subscript2 ::= expr expr DUP_TOP_TWO BINARY_SUBSCR # The were found using grammar coverage list_if ::= expr jmp_false list_iter COME_FROM list_if_not ::= expr jmp_true list_iter COME_FROM - """) + """ + ) def customize_grammar_rules(self, tokens, customize): super(Python31Parser, self).customize_grammar_rules(tokens, customize) self.remove_rules_31() return + pass + class Python31ParserSingle(Python31Parser, PythonParserSingle): pass -if __name__ == '__main__': + +if __name__ == "__main__": # Check grammar p = Python31Parser() p.remove_rules_31() p.check_grammar() - from uncompyle6 import PYTHON_VERSION, IS_PYPY - if PYTHON_VERSION == 3.1: + from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE + + if PYTHON_VERSION_TRIPLE[:2] == (3, 1): lhs, rhs, tokens, right_recursive, dup_rhs = p.check_sets() from uncompyle6.scanner import get_scanner - s = get_scanner(PYTHON_VERSION, IS_PYPY) - opcode_set = set(s.opc.opname).union(set( - """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM + + s = get_scanner(PYTHON_VERSION_TRIPLE, IS_PYPY) + opcode_set = set(s.opc.opname).union( + set( + """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME LAMBDA_MARKER RETURN_LAST - """.split())) - ## FIXME: try this + """.split() + ) + ) + # FIXME: try this remain_tokens = set(tokens) - opcode_set import re - remain_tokens = set([re.sub(r'_\d+$', '', t) for t in remain_tokens]) - remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens]) + + remain_tokens = set([re.sub(r"_\d+$", "", t) for t in remain_tokens]) + remain_tokens = set([re.sub("_CONT$", "", t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set print(remain_tokens) import sys + if len(sys.argv) > 1: from spark_parser.spark import rule2str + for rule in sorted(p.rule2name.items()): print(rule2str(rule[0])) diff --git a/uncompyle6/parsers/parse32.py b/uncompyle6/parsers/parse32.py index be2629854..2e94e7295 100644 --- a/uncompyle6/parsers/parse32.py +++ b/uncompyle6/parsers/parse32.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2017 Rocky Bernstein +# Copyright (c) 2016-2017, 2022-2024 Rocky Bernstein """ spark grammar differences over Python 3 for Python 3.2. """ @@ -7,6 +7,7 @@ from uncompyle6.parser import PythonParserSingle from uncompyle6.parsers.parse3 import Python3Parser + class Python32Parser(Python3Parser): def p_30to33(self, args): """ @@ -15,14 +16,18 @@ def p_30to33(self, args): store_locals ::= LOAD_FAST STORE_LOCALS """ + def p_gen_comp32(self, args): + """ + genexpr_func ::= LOAD_ARG FOR_ITER store comp_iter JUMP_BACK + """ + def p_32to35(self, args): """ if_exp ::= expr jmp_false expr jump_forward_else expr COME_FROM - # compare_chained2 is used in a "chained_compare": x <= y <= z - # used exclusively in compare_chained - compare_chained2 ::= expr COMPARE_OP RETURN_VALUE - compare_chained2 ::= expr COMPARE_OP RETURN_VALUE_LAMBDA + # compare_chained_right is used in a "chained_compare": x <= y <= z + compare_chained_right ::= expr COMPARE_OP RETURN_VALUE + compare_chained_right ::= expr COMPARE_OP RETURN_VALUE_LAMBDA # Python < 3.5 no POP BLOCK whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK COME_FROM_LOOP @@ -53,6 +58,7 @@ def p_32to35(self, args): kv3 ::= expr expr STORE_MAP """ + pass def p_32on(self, args): @@ -63,7 +69,8 @@ def p_32on(self, args): pass def customize_grammar_rules(self, tokens, customize): - self.remove_rules(""" + self.remove_rules( + """ except_handler ::= JUMP_FORWARD COME_FROM except_stmts END_FINALLY COME_FROM except_handler ::= JUMP_FORWARD COME_FROM except_stmts END_FINALLY COME_FROM_EXCEPT except_handler ::= JUMP_FORWARD COME_FROM_EXCEPT except_stmts END_FINALLY COME_FROM_EXCEPT_CLAUSE @@ -71,17 +78,22 @@ def customize_grammar_rules(self, tokens, customize): tryelsestmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK except_handler else_suite come_from_except_clauses whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK NOP COME_FROM_LOOP whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK NOP COME_FROM_LOOP - """) + """ + ) super(Python32Parser, self).customize_grammar_rules(tokens, customize) for i, token in enumerate(tokens): opname = token.kind - if opname.startswith('MAKE_FUNCTION_A'): - args_pos, args_kw, annotate_args = token.attr + if opname.startswith("MAKE_FUNCTION_A"): + args_pos, _, annotate_args = token.attr # Check that there are 2 annotated params? - rule = (('mkfunc_annotate ::= %s%sannotate_tuple ' - 'LOAD_CONST LOAD_CODE EXTENDED_ARG %s') % - (('pos_arg ' * (args_pos)), - ('annotate_arg ' * (annotate_args-1)), opname)) + rule = ( + "mkfunc_annotate ::= %s%sannotate_tuple " + "LOAD_CONST LOAD_CODE EXTENDED_ARG %s" + ) % ( + ("pos_arg " * args_pos), + ("annotate_arg " * (annotate_args)), + opname, + ) self.add_unique_rule(rule, opname, token.attr, customize) pass return diff --git a/uncompyle6/parsers/parse33.py b/uncompyle6/parsers/parse33.py index 8135db87d..ce1fc6727 100644 --- a/uncompyle6/parsers/parse33.py +++ b/uncompyle6/parsers/parse33.py @@ -1,29 +1,38 @@ -# Copyright (c) 2016 Rocky Bernstein +# Copyright (c) 2016, 2024 Rocky Bernstein """ spark grammar differences over Python 3.2 for Python 3.3. """ -from __future__ import print_function from uncompyle6.parser import PythonParserSingle from uncompyle6.parsers.parse32 import Python32Parser -class Python33Parser(Python32Parser): +class Python33Parser(Python32Parser): def p_33on(self, args): """ # Python 3.3+ adds yield from. expr ::= yield_from yield_from ::= expr expr YIELD_FROM + stmt ::= genexpr_func """ def customize_grammar_rules(self, tokens, customize): - self.remove_rules(""" + self.remove_rules( + """ # 3.3+ adds POP_BLOCKS - whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK NOP COME_FROM_LOOP whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK NOP COME_FROM_LOOP - """) + whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK NOP COME_FROM_LOOP + """ + ) super(Python33Parser, self).customize_grammar_rules(tokens, customize) + + # FIXME: move 3.3 stuff out of parse3.py and put it here. + # for i, token in enumerate(tokens): + # opname = token.kind + # opname_base = opname[: opname.rfind("_")] + return + class Python33ParserSingle(Python33Parser, PythonParserSingle): pass diff --git a/uncompyle6/parsers/parse34.py b/uncompyle6/parsers/parse34.py index 8eff26dd6..a381448ad 100644 --- a/uncompyle6/parsers/parse34.py +++ b/uncompyle6/parsers/parse34.py @@ -1,4 +1,4 @@ -# Copyright (c) 2017-2018 Rocky Bernstein +# Copyright (c) 2017-2018, 2022-2024 Rocky Bernstein # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,8 +19,8 @@ from uncompyle6.parser import PythonParserSingle from uncompyle6.parsers.parse33 import Python33Parser -class Python34Parser(Python33Parser): +class Python34Parser(Python33Parser): def p_misc34(self, args): """ expr ::= LOAD_ASSERT @@ -51,39 +51,54 @@ def p_misc34(self, args): yield_from ::= expr GET_ITER LOAD_CONST YIELD_FROM _ifstmts_jump ::= c_stmts_opt JUMP_ABSOLUTE JUMP_FORWARD COME_FROM + + genexpr_func ::= LOAD_ARG _come_froms FOR_ITER store comp_iter JUMP_BACK + + if_exp_lambda ::= expr jmp_false expr return_if_lambda return_stmt_lambda LAMBDA_MARKER + return_if_lambda ::= RETURN_END_IF_LAMBDA come_froms + return_if_stmt ::= return_expr RETURN_END_IF POP_BLOCK """ def customize_grammar_rules(self, tokens, customize): - self.remove_rules(""" + self.remove_rules( + """ yield_from ::= expr expr YIELD_FROM # 3.4.2 has this. 3.4.4 may now # while1stmt ::= SETUP_LOOP l_stmts COME_FROM JUMP_BACK COME_FROM_LOOP - """) + """ + ) super(Python34Parser, self).customize_grammar_rules(tokens, customize) return + class Python34ParserSingle(Python34Parser, PythonParserSingle): pass -if __name__ == '__main__': +if __name__ == "__main__": # Check grammar p = Python34Parser() p.check_grammar() - from uncompyle6 import PYTHON_VERSION, IS_PYPY - if PYTHON_VERSION == 3.4: + from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE + + if PYTHON_VERSION_TRIPLE[:2] == (3, 4): lhs, rhs, tokens, right_recursive, dup_rhs = p.check_sets() from uncompyle6.scanner import get_scanner - s = get_scanner(PYTHON_VERSION, IS_PYPY) - opcode_set = set(s.opc.opname).union(set( - """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM + + s = get_scanner(PYTHON_VERSION_TRIPLE, IS_PYPY) + opcode_set = set(s.opc.opname).union( + set( + """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME LAMBDA_MARKER RETURN_LAST - """.split())) + """.split() + ) + ) remain_tokens = set(tokens) - opcode_set import re - remain_tokens = set([re.sub(r'_\d+$', '', t) for t in remain_tokens]) - remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens]) + + remain_tokens = set([re.sub(r"_\d+$", "", t) for t in remain_tokens]) + remain_tokens = set([re.sub("_CONT$", "", t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set print(remain_tokens) # print(sorted(p.rule2name.items())) diff --git a/uncompyle6/parsers/parse35.py b/uncompyle6/parsers/parse35.py index 23d924164..929d920d5 100644 --- a/uncompyle6/parsers/parse35.py +++ b/uncompyle6/parsers/parse35.py @@ -1,15 +1,17 @@ -# Copyright (c) 2016-2017, 2019, 2021 Rocky Bernstein +# Copyright (c) 2016-2017, 2019, 2021, 2023-2024 +# Rocky Bernstein """ spark grammar differences over Python 3.4 for Python 3.5. """ from __future__ import print_function -from uncompyle6.parser import PythonParserSingle, nop_func from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG + +from uncompyle6.parser import PythonParserSingle, nop_func from uncompyle6.parsers.parse34 import Python34Parser -class Python35Parser(Python34Parser): +class Python35Parser(Python34Parser): def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): super(Python35Parser, self).__init__(debug_parser) self.customized = {} @@ -55,7 +57,7 @@ def p_35on(self, args): POP_BLOCK LOAD_CONST COME_FROM_WITH WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY - withasstmt ::= expr + with_as ::= expr SETUP_WITH store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_WITH WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY @@ -106,15 +108,16 @@ def p_35on(self, args): # Python 3.5+ does jump optimization # In <.3.5 the below is a JUMP_FORWARD to a JUMP_ABSOLUTE. - return_if_stmt ::= return_expr RETURN_END_IF POP_BLOCK return_if_lambda ::= RETURN_END_IF_LAMBDA COME_FROM + return ::= return_expr RETURN_END_IF + jb_else ::= JUMP_BACK ELSE ifelsestmtc ::= testexpr c_stmts_opt JUMP_FORWARD else_suitec ifelsestmtl ::= testexpr c_stmts_opt jb_else else_suitel # 3.5 Has jump optimization which can route the end of an - # "if/then" back to to a loop just before an else. + # "if/then" back to a loop just before an else. jump_absolute_else ::= jb_else jump_absolute_else ::= CONTINUE ELSE @@ -135,40 +138,42 @@ def p_35on(self, args): """ def customize_grammar_rules(self, tokens, customize): - self.remove_rules(""" + self.remove_rules( + """ yield_from ::= expr GET_ITER LOAD_CONST YIELD_FROM yield_from ::= expr expr YIELD_FROM with ::= expr SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_WITH WITH_CLEANUP END_FINALLY - withasstmt ::= expr SETUP_WITH store suite_stmts_opt + with_as ::= expr SETUP_WITH store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_WITH WITH_CLEANUP END_FINALLY - """) + """ + ) super(Python35Parser, self).customize_grammar_rules(tokens, customize) for i, token in enumerate(tokens): opname = token.kind - if opname == 'LOAD_ASSERT': - if 'PyPy' in customize: + if opname == "LOAD_ASSERT": + if "PyPy" in customize: rules_str = """ stmt ::= JUMP_IF_NOT_DEBUG stmts COME_FROM """ self.add_unique_doc_rules(rules_str, customize) # FIXME: I suspect this is wrong for 3.6 and 3.5, but # I haven't verified what the 3.7ish fix is - elif opname == 'BUILD_MAP_UNPACK_WITH_CALL': + elif opname == "BUILD_MAP_UNPACK_WITH_CALL": if self.version < (3, 7): self.addRule("expr ::= unmapexpr", nop_func) nargs = token.attr % 256 map_unpack_n = "map_unpack_%s" % nargs - rule = map_unpack_n + ' ::= ' + 'expr ' * (nargs) + rule = map_unpack_n + " ::= " + "expr " * (nargs) self.addRule(rule, nop_func) rule = "unmapexpr ::= %s %s" % (map_unpack_n, opname) self.addRule(rule, nop_func) - call_token = tokens[i+1] - rule = 'call ::= expr unmapexpr ' + call_token.kind + call_token = tokens[i + 1] + rule = "call ::= expr unmapexpr " + call_token.kind self.addRule(rule, nop_func) - elif opname == 'BEFORE_ASYNC_WITH' and self.version < (3, 8): + elif opname == "BEFORE_ASYNC_WITH" and self.version < (3, 8): # Some Python 3.5+ async additions rules_str = """ stmt ::= async_with_stmt @@ -199,24 +204,27 @@ def customize_grammar_rules(self, tokens, customize): async_with_post """ self.addRule(rules_str, nop_func) - elif opname == 'BUILD_MAP_UNPACK': - self.addRule(""" + elif opname == "BUILD_MAP_UNPACK": + self.addRule( + """ expr ::= dict_unpack dict_unpack ::= dict_comp BUILD_MAP_UNPACK - """, nop_func) + """, + nop_func, + ) - elif opname == 'SETUP_WITH': + elif opname == "SETUP_WITH": # Python 3.5+ has WITH_CLEANUP_START/FINISH rules_str = """ - with ::= expr - SETUP_WITH POP_TOP suite_stmts_opt - POP_BLOCK LOAD_CONST COME_FROM_WITH - WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY - - withasstmt ::= expr - SETUP_WITH store suite_stmts_opt - POP_BLOCK LOAD_CONST COME_FROM_WITH - WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + with ::= expr + SETUP_WITH POP_TOP suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM_WITH + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + + with_as ::= expr + SETUP_WITH store suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM_WITH + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY """ self.addRule(rules_str, nop_func) pass @@ -230,19 +238,24 @@ def custom_classfunc_rule(self, opname, token, customize, *args): # 1 for CALL_FUNCTION_VAR or CALL_FUNCTION_KW # 2 for * and ** args (CALL_FUNCTION_VAR_KW). # Yes, this computation based on instruction name is a little bit hoaky. - nak = ( len(opname)-len('CALL_FUNCTION') ) // 3 + nak = (len(opname) - len("CALL_FUNCTION")) // 3 uniq_param = args_kw + args_pos - if frozenset(('GET_AWAITABLE', 'YIELD_FROM')).issubset(self.seen_ops): - rule = ('async_call ::= expr ' + - ('pos_arg ' * args_pos) + - ('kwarg ' * args_kw) + - 'expr ' * nak + token.kind + - ' GET_AWAITABLE LOAD_CONST YIELD_FROM') + if frozenset(("GET_AWAITABLE", "YIELD_FROM")).issubset(self.seen_ops): + rule = ( + "async_call ::= expr " + + ("pos_arg " * args_pos) + + ("kwarg " * args_kw) + + "expr " * nak + + token.kind + + " GET_AWAITABLE LOAD_CONST YIELD_FROM" + ) self.add_unique_rule(rule, token.kind, uniq_param, customize) - self.add_unique_rule('expr ::= async_call', token.kind, uniq_param, customize) + self.add_unique_rule( + "expr ::= async_call", token.kind, uniq_param, customize + ) - if opname.startswith('CALL_FUNCTION_VAR'): + if opname.startswith("CALL_FUNCTION_VAR"): # Python 3.5 changes the stack position of *args. KW args come # after *args. @@ -250,43 +263,55 @@ def custom_classfunc_rule(self, opname, token, customize, *args): # CALL_FUNCTION_VAR_KW with CALL_FUNCTION_EX token.kind = self.call_fn_name(token) - if opname.endswith('KW'): - kw = 'expr ' + if opname.endswith("KW"): + kw = "expr " else: - kw = '' - rule = ('call ::= expr expr ' + - ('pos_arg ' * args_pos) + - ('kwarg ' * args_kw) + kw + token.kind) + kw = "" + rule = ( + "call ::= expr expr " + + ("pos_arg " * args_pos) + + ("kwarg " * args_kw) + + kw + + token.kind + ) - # Note: semantic actions make use of the fact of wheter "args_pos" + # Note: semantic actions make use of the fact of whether "args_pos" # zero or not in creating a template rule. self.add_unique_rule(rule, token.kind, args_pos, customize) else: - super(Python35Parser, self).custom_classfunc_rule(opname, token, customize, *args + super(Python35Parser, self).custom_classfunc_rule( + opname, token, customize, *args ) class Python35ParserSingle(Python35Parser, PythonParserSingle): pass -if __name__ == '__main__': + +if __name__ == "__main__": # Check grammar p = Python35Parser() p.check_grammar() - from uncompyle6 import PYTHON_VERSION, IS_PYPY - if PYTHON_VERSION == 3.5: + from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE + + if PYTHON_VERSION_TRIPLE[:2] == (3, 5): lhs, rhs, tokens, right_recursive, dup_rhs = p.check_sets() from uncompyle6.scanner import get_scanner - s = get_scanner(PYTHON_VERSION, IS_PYPY) - opcode_set = set(s.opc.opname).union(set( - """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM + + s = get_scanner(PYTHON_VERSION_TRIPLE, IS_PYPY) + opcode_set = set(s.opc.opname).union( + set( + """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME LAMBDA_MARKER RETURN_LAST - """.split())) + """.split() + ) + ) remain_tokens = set(tokens) - opcode_set import re - remain_tokens = set([re.sub(r'_\d+$', '', t) for t in remain_tokens]) - remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens]) + + remain_tokens = set([re.sub(r"_\d+$", "", t) for t in remain_tokens]) + remain_tokens = set([re.sub("_CONT$", "", t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set print(remain_tokens) # print(sorted(p.rule2name.items())) diff --git a/uncompyle6/parsers/parse36.py b/uncompyle6/parsers/parse36.py index 8118d7191..099806952 100644 --- a/uncompyle6/parsers/parse36.py +++ b/uncompyle6/parsers/parse36.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2020 Rocky Bernstein +# Copyright (c) 2016-2020, 2022-2024 Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,19 +17,26 @@ """ from __future__ import print_function -from uncompyle6.parser import PythonParserSingle, nop_func from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG + +from uncompyle6.parser import PythonParserSingle, nop_func from uncompyle6.parsers.parse35 import Python35Parser from uncompyle6.scanners.tok import Token -class Python36Parser(Python35Parser): +class Python36Parser(Python35Parser): def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): super(Python36Parser, self).__init__(debug_parser) self.customized = {} + def p_36_jump(self, args): + """ + # Zero or one COME_FROM + # And/or expressions have this + come_from_opt ::= COME_FROM? + """ - def p_36misc(self, args): + def p_36_misc(self, args): """sstmt ::= sstmt RETURN_LAST # long except clauses in a loop can sometimes cause a JUMP_BACK to turn into a @@ -46,13 +53,15 @@ def p_36misc(self, args): for_block ::= l_stmts_opt come_from_loops JUMP_BACK come_from_loops ::= COME_FROM_LOOP* + whilestmt ::= SETUP_LOOP testexpr l_stmts_opt + JUMP_BACK come_froms POP_BLOCK whilestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK come_froms POP_BLOCK COME_FROM_LOOP whilestmt ::= SETUP_LOOP testexpr l_stmts_opt come_froms JUMP_BACK come_froms POP_BLOCK COME_FROM_LOOP # 3.6 due to jump optimization, we sometimes add RETURN_END_IF where - # RETURN_VALUE is meant. Specifcally this can happen in + # RETURN_VALUE is meant. Specifically, this can happen in # ifelsestmt -> ...else_suite _. suite_stmts... (last) stmt return ::= return_expr RETURN_END_IF return ::= return_expr RETURN_VALUE COME_FROM @@ -67,6 +76,33 @@ def p_36misc(self, args): if_exp ::= expr jmp_false expr jf_cf expr COME_FROM + async_for_stmt36 ::= SETUP_LOOP expr + GET_AITER + LOAD_CONST YIELD_FROM + SETUP_EXCEPT GET_ANEXT LOAD_CONST + YIELD_FROM + store + POP_BLOCK JUMP_BACK COME_FROM_EXCEPT DUP_TOP + LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_TRUE + END_FINALLY for_block + COME_FROM + POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP POP_BLOCK + COME_FROM_LOOP + + async_for_stmt36 ::= SETUP_LOOP expr + GET_AITER + LOAD_CONST YIELD_FROM SETUP_EXCEPT GET_ANEXT LOAD_CONST + YIELD_FROM + store + POP_BLOCK JUMP_FORWARD COME_FROM_EXCEPT DUP_TOP + LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_TRUE + END_FINALLY + COME_FROM + for_block + COME_FROM + POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP POP_BLOCK + COME_FROM_LOOP + async_for_stmt ::= SETUP_LOOP expr GET_AITER LOAD_CONST YIELD_FROM SETUP_EXCEPT GET_ANEXT LOAD_CONST @@ -80,20 +116,7 @@ def p_36misc(self, args): COME_FROM_LOOP stmt ::= async_for_stmt36 - - async_for_stmt36 ::= SETUP_LOOP expr - GET_AITER - LOAD_CONST YIELD_FROM - SETUP_EXCEPT GET_ANEXT LOAD_CONST - YIELD_FROM - store - POP_BLOCK JUMP_BACK COME_FROM_EXCEPT DUP_TOP - LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_TRUE - END_FINALLY for_block - COME_FROM - POP_TOP POP_TOP POP_TOP POP_EXCEPT - POP_TOP POP_BLOCK - COME_FROM_LOOP + stmt ::= async_forelse_stmt36 async_forelse_stmt ::= SETUP_LOOP expr GET_AITER @@ -107,6 +130,19 @@ def p_36misc(self, args): for_block POP_BLOCK else_suite COME_FROM_LOOP + async_forelse_stmt36 ::= SETUP_LOOP expr + GET_AITER + LOAD_CONST YIELD_FROM SETUP_EXCEPT GET_ANEXT LOAD_CONST + YIELD_FROM + store + POP_BLOCK JUMP_FORWARD COME_FROM_EXCEPT DUP_TOP + LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_TRUE + END_FINALLY COME_FROM + for_block _come_froms + POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP + POP_BLOCK + else_suite COME_FROM_LOOP + # Adds a COME_FROM_ASYNC_WITH over 3.5 # FIXME: remove corresponding rule for 3.5? @@ -157,13 +193,12 @@ def p_36misc(self, args): tryfinally_return_stmt ::= SETUP_FINALLY suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_FINALLY - compare_chained2 ::= expr COMPARE_OP come_froms JUMP_FORWARD - + compare_chained_right ::= expr COMPARE_OP come_froms JUMP_FORWARD """ # Some of this is duplicated from parse37. Eventually we'll probably rebase from # that and then we can remove this. - def p_37conditionals(self, args): + def p_36_conditionals(self, args): """ expr ::= if_exp37 if_exp37 ::= expr expr jf_cfs expr COME_FROM @@ -175,7 +210,8 @@ def customize_grammar_rules(self, tokens, customize): # self.remove_rules(""" # """) super(Python36Parser, self).customize_grammar_rules(tokens, customize) - self.remove_rules(""" + self.remove_rules( + """ _ifstmts_jumpl ::= c_stmts_opt _ifstmts_jumpl ::= _ifstmts_jump except_handler ::= JUMP_FORWARD COME_FROM_EXCEPT except_stmts END_FINALLY COME_FROM @@ -202,26 +238,37 @@ def customize_grammar_rules(self, tokens, customize): for_block pb_ja else_suite COME_FROM_LOOP - """) - self.check_reduce['call_kw'] = 'AST' + """ + ) + self.check_reduce["call_kw"] = "AST" + + # Opcode names in the custom_ops_processed set have rules that get added + # unconditionally and the rules are constant. So they need to be done + # only once and if we see the opcode a second we don't have to consider + # adding more rules. + # + # Note: BUILD_TUPLE_UNPACK_WITH_CALL gets considered by + # default because it starts with BUILD. So we'll set to ignore it from + # the start. + custom_ops_processed = set() for i, token in enumerate(tokens): opname = token.kind - if opname == 'FORMAT_VALUE': + if opname == "FORMAT_VALUE": rules_str = """ expr ::= formatted_value1 formatted_value1 ::= expr FORMAT_VALUE """ self.add_unique_doc_rules(rules_str, customize) - elif opname == 'FORMAT_VALUE_ATTR': + elif opname == "FORMAT_VALUE_ATTR": rules_str = """ expr ::= formatted_value2 formatted_value2 ::= expr expr FORMAT_VALUE_ATTR """ self.add_unique_doc_rules(rules_str, customize) - elif opname == 'MAKE_FUNCTION_CLOSURE': - if 'LOAD_DICTCOMP' in self.seen_ops: + elif opname == "MAKE_FUNCTION_CLOSURE": + if "LOAD_DICTCOMP" in self.seen_ops: # Is there something general going on here? rule = """ dict_comp ::= load_closure LOAD_DICTCOMP LOAD_STR @@ -229,7 +276,7 @@ def customize_grammar_rules(self, tokens, customize): GET_ITER CALL_FUNCTION_1 """ self.addRule(rule, nop_func) - elif 'LOAD_SETCOMP' in self.seen_ops: + elif "LOAD_SETCOMP" in self.seen_ops: rule = """ set_comp ::= load_closure LOAD_SETCOMP LOAD_STR MAKE_FUNCTION_CLOSURE expr @@ -237,7 +284,7 @@ def customize_grammar_rules(self, tokens, customize): """ self.addRule(rule, nop_func) - elif opname == 'BEFORE_ASYNC_WITH': + elif opname == "BEFORE_ASYNC_WITH": rules_str = """ stmt ::= async_with_stmt async_with_pre ::= BEFORE_ASYNC_WITH GET_AWAITABLE LOAD_CONST YIELD_FROM SETUP_ASYNC_WITH @@ -263,32 +310,207 @@ def customize_grammar_rules(self, tokens, customize): """ self.addRule(rules_str, nop_func) - elif opname.startswith('BUILD_STRING'): + elif opname.startswith("BUILD_STRING"): v = token.attr rules_str = """ expr ::= joined_str joined_str ::= %sBUILD_STRING_%d - """ % ("expr " * v, v) + """ % ( + "expr " * v, + v, + ) self.add_unique_doc_rules(rules_str, customize) - if 'FORMAT_VALUE_ATTR' in self.seen_ops: + if "FORMAT_VALUE_ATTR" in self.seen_ops: rules_str = """ formatted_value_attr ::= expr expr FORMAT_VALUE_ATTR expr BUILD_STRING expr ::= formatted_value_attr """ self.add_unique_doc_rules(rules_str, customize) - elif opname.startswith('BUILD_MAP_UNPACK_WITH_CALL'): + elif opname.startswith("BUILD_MAP_UNPACK_WITH_CALL"): v = token.attr - rule = 'build_map_unpack_with_call ::= %s%s' % ('expr ' * v, opname) + rule = "build_map_unpack_with_call ::= %s%s" % ("expr " * v, opname) self.addRule(rule, nop_func) - elif opname.startswith('BUILD_TUPLE_UNPACK_WITH_CALL'): + elif opname.startswith("BUILD_TUPLE_UNPACK_WITH_CALL"): v = token.attr - rule = ('build_tuple_unpack_with_call ::= ' + 'expr1024 ' * int(v//1024) + - 'expr32 ' * int((v//32) % 32) + - 'expr ' * (v % 32) + opname) + rule = ( + "build_tuple_unpack_with_call ::= " + + "expr1024 " * int(v // 1024) + + "expr32 " * int((v // 32) % 32) + + "expr " * (v % 32) + + opname + ) self.addRule(rule, nop_func) - rule = ('starred ::= %s %s' % ('expr ' * v, opname)) + rule = "starred ::= %s %s" % ("expr " * v, opname) self.addRule(rule, nop_func) - elif opname == 'SETUP_ANNOTATIONS': + elif opname == "GET_AITER": + self.addRule( + """ + expr ::= generator_exp_async + + generator_exp_async ::= load_genexpr LOAD_STR MAKE_FUNCTION_0 expr + GET_AITER LOAD_CONST YIELD_FROM CALL_FUNCTION_1 + stmt ::= genexpr_func_async + + func_async_prefix ::= _come_froms + LOAD_CONST YIELD_FROM + SETUP_EXCEPT GET_ANEXT LOAD_CONST YIELD_FROM + func_async_middle ::= POP_BLOCK JUMP_FORWARD COME_FROM_EXCEPT + DUP_TOP LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_TRUE + END_FINALLY COME_FROM + genexpr_func_async ::= LOAD_ARG func_async_prefix + store func_async_middle comp_iter + JUMP_BACK + POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP + + expr ::= list_comp_async + list_comp_async ::= LOAD_LISTCOMP LOAD_STR MAKE_FUNCTION_0 + expr GET_AITER + LOAD_CONST YIELD_FROM CALL_FUNCTION_1 + GET_AWAITABLE LOAD_CONST + YIELD_FROM + + expr ::= list_comp_async + list_afor2 ::= func_async_prefix + store func_async_middle list_iter + JUMP_BACK + POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP + list_comp_async ::= BUILD_LIST_0 LOAD_ARG list_afor2 + get_aiter ::= expr GET_AITER + list_afor ::= get_aiter list_afor2 + list_iter ::= list_afor + """, + nop_func, + ) + + elif opname == "GET_AITER": + self.add_unique_doc_rules("get_aiter ::= expr GET_AITER", customize) + + if not {"MAKE_FUNCTION_0", "MAKE_FUNCTION_CLOSURE"} in self.seen_ops: + self.addRule( + """ + expr ::= dict_comp_async + expr ::= generator_exp_async + expr ::= list_comp_async + + dict_comp_async ::= LOAD_DICTCOMP + LOAD_STR + MAKE_FUNCTION_0 + get_aiter + CALL_FUNCTION_1 + + dict_comp_async ::= BUILD_MAP_0 LOAD_ARG + dict_comp_async + + func_async_middle ::= POP_BLOCK JUMP_FORWARD COME_FROM_EXCEPT + DUP_TOP LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_TRUE + END_FINALLY COME_FROM + + func_async_prefix ::= _come_froms SETUP_EXCEPT GET_ANEXT LOAD_CONST YIELD_FROM + + generator_exp_async ::= load_genexpr LOAD_STR MAKE_FUNCTION_0 + get_aiter CALL_FUNCTION_1 + + genexpr_func_async ::= LOAD_ARG func_async_prefix + store func_async_middle comp_iter + JUMP_LOOP COME_FROM + POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP + + # FIXME this is a workaround for probably some bug in the Earley parser + # if we use get_aiter, then list_comp_async doesn't match, and I don't + # understand why. + expr_get_aiter ::= expr GET_AITER + + list_afor ::= get_aiter list_afor2 + + list_afor2 ::= func_async_prefix + store func_async_middle list_iter + JUMP_LOOP COME_FROM + POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP + + list_comp_async ::= BUILD_LIST_0 LOAD_ARG list_afor2 + list_comp_async ::= LOAD_LISTCOMP LOAD_STR MAKE_FUNCTION_0 + expr_get_aiter CALL_FUNCTION_1 + GET_AWAITABLE LOAD_CONST + YIELD_FROM + + list_iter ::= list_afor + + set_comp_async ::= LOAD_SETCOMP + LOAD_STR + MAKE_FUNCTION_0 + get_aiter + CALL_FUNCTION_1 + + set_comp_async ::= LOAD_CLOSURE + BUILD_TUPLE_1 + LOAD_SETCOMP + LOAD_STR MAKE_FUNCTION_CLOSURE + get_aiter CALL_FUNCTION_1 + await + """, + nop_func, + ) + custom_ops_processed.add(opname) + + self.addRule( + """ + dict_comp_async ::= BUILD_MAP_0 LOAD_ARG + dict_comp_async + + expr ::= dict_comp_async + expr ::= generator_exp_async + expr ::= list_comp_async + expr ::= set_comp_async + + func_async_middle ::= POP_BLOCK JUMP_FORWARD COME_FROM_EXCEPT + DUP_TOP LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_TRUE + END_FINALLY _come_froms + + get_aiter ::= expr GET_AITER + + list_afor ::= get_aiter list_afor2 + + list_comp_async ::= BUILD_LIST_0 LOAD_ARG list_afor2 + list_iter ::= list_afor + + + set_afor ::= get_aiter set_afor2 + set_iter ::= set_afor + set_iter ::= set_for + + set_comp_async ::= BUILD_SET_0 LOAD_ARG + set_comp_async + + """, + nop_func, + ) + custom_ops_processed.add(opname) + + elif opname == "GET_ANEXT": + self.addRule( + """ + func_async_prefix ::= _come_froms SETUP_EXCEPT GET_ANEXT LOAD_CONST YIELD_FROM + func_async_prefix ::= _come_froms SETUP_FINALLY GET_ANEXT LOAD_CONST YIELD_FROM POP_BLOCK + func_async_prefix ::= _come_froms + LOAD_CONST YIELD_FROM + SETUP_EXCEPT GET_ANEXT LOAD_CONST YIELD_FROM + func_async_middle ::= JUMP_FORWARD COME_FROM_EXCEPT + DUP_TOP LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_TRUE + list_comp_async ::= BUILD_LIST_0 LOAD_ARG list_afor2 + list_afor2 ::= func_async_prefix + store list_iter + JUMP_BACK COME_FROM_FINALLY + END_ASYNC_FOR + list_afor2 ::= func_async_prefix + store func_async_middle list_iter + JUMP_LOOP COME_FROM + POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP + """, + nop_func, + ) + custom_ops_processed.add(opname) + + elif opname == "SETUP_ANNOTATIONS": # 3.6 Variable Annotations PEP 526 # This seems to come before STORE_ANNOTATION, and doesn't # correspond to direct Python source code. @@ -304,7 +526,7 @@ def customize_grammar_rules(self, tokens, customize): """ self.addRule(rule, nop_func) # Check to combine assignment + annotation into one statement - self.check_reduce['assign'] = 'token' + self.check_reduce["assign"] = "token" elif opname == "WITH_CLEANUP_START": rules_str = """ stmt ::= with_null @@ -312,13 +534,13 @@ def customize_grammar_rules(self, tokens, customize): with_suffix ::= WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY """ self.addRule(rules_str, nop_func) - elif opname == 'SETUP_WITH': + elif opname == "SETUP_WITH": rules_str = """ with ::= expr SETUP_WITH POP_TOP suite_stmts_opt COME_FROM_WITH with_suffix # Removes POP_BLOCK LOAD_CONST from 3.6- - withasstmt ::= expr SETUP_WITH store suite_stmts_opt COME_FROM_WITH + with_as ::= expr SETUP_WITH store suite_stmts_opt COME_FROM_WITH with_suffix with ::= expr SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK BEGIN_FINALLY COME_FROM_WITH @@ -330,7 +552,6 @@ def customize_grammar_rules(self, tokens, customize): return def custom_classfunc_rule(self, opname, token, customize, next_token, is_pypy): - args_pos, args_kw = self.get_pos_kw(token) # Additional exprs for * and ** args: @@ -338,140 +559,186 @@ def custom_classfunc_rule(self, opname, token, customize, next_token, is_pypy): # 1 for CALL_FUNCTION_VAR or CALL_FUNCTION_KW # 2 for * and ** args (CALL_FUNCTION_VAR_KW). # Yes, this computation based on instruction name is a little bit hoaky. - nak = ( len(opname)-len('CALL_FUNCTION') ) // 3 + nak = (len(opname) - len("CALL_FUNCTION")) // 3 uniq_param = args_kw + args_pos - if frozenset(('GET_AWAITABLE', 'YIELD_FROM')).issubset(self.seen_ops): - rule = ('async_call ::= expr ' + - ('pos_arg ' * args_pos) + - ('kwarg ' * args_kw) + - 'expr ' * nak + token.kind + - ' GET_AWAITABLE LOAD_CONST YIELD_FROM') + if frozenset(("GET_AWAITABLE", "YIELD_FROM")).issubset(self.seen_ops): + rule = ( + "async_call ::= expr " + + ("pos_arg " * args_pos) + + ("kwarg " * args_kw) + + "expr " * nak + + token.kind + + " GET_AWAITABLE LOAD_CONST YIELD_FROM" + ) self.add_unique_rule(rule, token.kind, uniq_param, customize) - self.add_unique_rule('expr ::= async_call', token.kind, uniq_param, customize) + self.add_unique_rule( + "expr ::= async_call", token.kind, uniq_param, customize + ) - if opname.startswith('CALL_FUNCTION_KW'): + if opname.startswith("CALL_FUNCTION_KW"): if is_pypy: # PYPY doesn't follow CPython 3.6 CALL_FUNCTION_KW conventions - super(Python36Parser, self).custom_classfunc_rule(opname, token, customize, next_token, is_pypy) + super(Python36Parser, self).custom_classfunc_rule( + opname, token, customize, next_token, is_pypy + ) else: self.addRule("expr ::= call_kw36", nop_func) - values = 'expr ' * token.attr - rule = "call_kw36 ::= expr {values} LOAD_CONST {opname}".format(**locals()) + values = "expr " * token.attr + rule = "call_kw36 ::= expr {values} LOAD_CONST {opname}".format( + **locals() + ) self.add_unique_rule(rule, token.kind, token.attr, customize) - elif opname == 'CALL_FUNCTION_EX_KW': + elif opname == "CALL_FUNCTION_EX_KW": # Note: this doesn't exist in 3.7 and later - self.addRule("""expr ::= call_ex_kw4 + self.addRule( + """expr ::= call_ex_kw4 call_ex_kw4 ::= expr expr expr CALL_FUNCTION_EX_KW """, - nop_func) - if 'BUILD_MAP_UNPACK_WITH_CALL' in self.seen_op_basenames: - self.addRule("""expr ::= call_ex_kw + nop_func, + ) + if "BUILD_MAP_UNPACK_WITH_CALL" in self.seen_op_basenames: + self.addRule( + """expr ::= call_ex_kw call_ex_kw ::= expr expr build_map_unpack_with_call CALL_FUNCTION_EX_KW - """, nop_func) - if 'BUILD_TUPLE_UNPACK_WITH_CALL' in self.seen_op_basenames: + """, + nop_func, + ) + if "BUILD_TUPLE_UNPACK_WITH_CALL" in self.seen_op_basenames: # FIXME: should this be parameterized by EX value? - self.addRule("""expr ::= call_ex_kw3 + self.addRule( + """expr ::= call_ex_kw3 call_ex_kw3 ::= expr build_tuple_unpack_with_call expr CALL_FUNCTION_EX_KW - """, nop_func) - if 'BUILD_MAP_UNPACK_WITH_CALL' in self.seen_op_basenames: + """, + nop_func, + ) + if "BUILD_MAP_UNPACK_WITH_CALL" in self.seen_op_basenames: # FIXME: should this be parameterized by EX value? - self.addRule("""expr ::= call_ex_kw2 + self.addRule( + """expr ::= call_ex_kw2 call_ex_kw2 ::= expr build_tuple_unpack_with_call build_map_unpack_with_call CALL_FUNCTION_EX_KW - """, nop_func) + """, + nop_func, + ) - elif opname == 'CALL_FUNCTION_EX': - self.addRule(""" + elif opname == "CALL_FUNCTION_EX": + self.addRule( + """ expr ::= call_ex starred ::= expr call_ex ::= expr starred CALL_FUNCTION_EX - """, nop_func) + """, + nop_func, + ) if self.version >= (3, 6): - if 'BUILD_MAP_UNPACK_WITH_CALL' in self.seen_ops: - self.addRule(""" + if "BUILD_MAP_UNPACK_WITH_CALL" in self.seen_ops: + self.addRule( + """ expr ::= call_ex_kw call_ex_kw ::= expr expr build_map_unpack_with_call CALL_FUNCTION_EX - """, nop_func) - if 'BUILD_TUPLE_UNPACK_WITH_CALL' in self.seen_ops: - self.addRule(""" + """, + nop_func, + ) + if "BUILD_TUPLE_UNPACK_WITH_CALL" in self.seen_ops: + self.addRule( + """ expr ::= call_ex_kw3 call_ex_kw3 ::= expr build_tuple_unpack_with_call %s CALL_FUNCTION_EX - """ % 'expr ' * token.attr, nop_func) + """ + % "expr " + * token.attr, + nop_func, + ) pass # FIXME: Is this right? - self.addRule(""" + self.addRule( + """ expr ::= call_ex_kw4 call_ex_kw4 ::= expr expr expr CALL_FUNCTION_EX - """, nop_func) + """, + nop_func, + ) pass else: - super(Python36Parser, self).custom_classfunc_rule(opname, token, customize, next_token, is_pypy) + super(Python36Parser, self).custom_classfunc_rule( + opname, token, customize, next_token, is_pypy + ) def reduce_is_invalid(self, rule, ast, tokens, first, last): - invalid = super(Python36Parser, - self).reduce_is_invalid(rule, ast, - tokens, first, last) + invalid = super(Python36Parser, self).reduce_is_invalid( + rule, ast, tokens, first, last + ) if invalid: return invalid - if rule[0] == 'assign': + if rule[0] == "assign": # Try to combine assignment + annotation into one statement - if (len(tokens) >= last + 1 and - tokens[last] == 'LOAD_NAME' and - tokens[last+1] == 'STORE_ANNOTATION' and - tokens[last-1].pattr == tokens[last+1].pattr): + if ( + len(tokens) >= last + 1 + and tokens[last] == "LOAD_NAME" + and tokens[last + 1] == "STORE_ANNOTATION" + and tokens[last - 1].pattr == tokens[last + 1].pattr + ): # Will handle as ann_assign_init_value return True pass - if rule[0] == 'call_kw': + if rule[0] == "call_kw": # Make sure we don't derive call_kw nt = ast[0] while not isinstance(nt, Token): - if nt[0] == 'call_kw': + if nt[0] == "call_kw": return True nt = nt[0] pass pass return False + class Python36ParserSingle(Python36Parser, PythonParserSingle): pass -if __name__ == '__main__': + +if __name__ == "__main__": # Check grammar p = Python36Parser() p.check_grammar() - from uncompyle6 import PYTHON_VERSION, IS_PYPY - if PYTHON_VERSION == 3.6: + from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE + + if PYTHON_VERSION_TRIPLE[:2] == (3, 6): lhs, rhs, tokens, right_recursive, dup_rhs = p.check_sets() from uncompyle6.scanner import get_scanner - s = get_scanner(PYTHON_VERSION, IS_PYPY) - opcode_set = set(s.opc.opname).union(set( - """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM + + s = get_scanner(PYTHON_VERSION_TRIPLE, IS_PYPY) + opcode_set = set(s.opc.opname).union( + set( + """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME LAMBDA_MARKER RETURN_LAST - """.split())) + """.split() + ) + ) remain_tokens = set(tokens) - opcode_set import re - remain_tokens = set([re.sub(r'_\d+$', '', t) for t in remain_tokens]) - remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens]) + + remain_tokens = set([re.sub(r"_\d+$", "", t) for t in remain_tokens]) + remain_tokens = set([re.sub("_CONT$", "", t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set print(remain_tokens) # print(sorted(p.rule2name.items())) diff --git a/uncompyle6/parsers/parse37.py b/uncompyle6/parsers/parse37.py index ce8dedc8d..e32e13c66 100644 --- a/uncompyle6/parsers/parse37.py +++ b/uncompyle6/parsers/parse37.py @@ -1,4 +1,4 @@ -# Copyright (c) 2017-2020, 2022 Rocky Bernstein +# Copyright (c) 2017-2020, 2022-2024 Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,10 +17,12 @@ """ from __future__ import print_function -from uncompyle6.scanners.tok import Token -from uncompyle6.parser import PythonParserSingle, nop_func from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG + +from uncompyle6.parser import PythonParserSingle, nop_func from uncompyle6.parsers.parse37base import Python37BaseParser +from uncompyle6.scanners.tok import Token + class Python37Parser(Python37BaseParser): def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): @@ -62,6 +64,9 @@ def p_stmt(self, args): c_stmts ::= lastc_stmt c_stmts ::= continues + ending_return ::= RETURN_VALUE RETURN_LAST + ending_return ::= RETURN_VALUE_LAMBDA LAMBDA_MARKER + lastc_stmt ::= iflaststmt lastc_stmt ::= forelselaststmt lastc_stmt ::= ifelsestmtc @@ -130,7 +135,8 @@ def p_stmt(self, args): stmt ::= return return ::= return_expr RETURN_VALUE - # "returns" nonterminal is a sequence of statements that ends in a RETURN statement. + # "returns" nonterminal is a sequence of statements that ends in a + # RETURN statement. # In later Python versions with jump optimization, this can cause JUMPs # that would normally appear to be omitted. @@ -138,7 +144,8 @@ def p_stmt(self, args): returns ::= _stmts return stmt ::= genexpr_func - genexpr_func ::= LOAD_FAST _come_froms FOR_ITER store comp_iter JUMP_BACK + genexpr_func ::= LOAD_ARG _come_froms FOR_ITER store comp_iter + _come_froms JUMP_BACK _come_froms """ pass @@ -219,11 +226,11 @@ def p_expr(self, args): compare ::= compare_single compare_single ::= expr expr COMPARE_OP - # A compare_chained is two comparisions like x <= y <= z - compare_chained ::= expr compare_chained1 ROT_TWO POP_TOP _come_froms - compare_chained2 ::= expr COMPARE_OP JUMP_FORWARD + # A compare_chained is two comparisons like x <= y <= z + compare_chained ::= expr compared_chained_middle ROT_TWO POP_TOP _come_froms + compare_chained_right ::= expr COMPARE_OP JUMP_FORWARD - # Non-null kvlist items are broken out in the indiviual grammars + # Non-null kvlist items are broken out in the individual grammars kvlist ::= # Positional arguments in make_function @@ -244,8 +251,7 @@ def p_function_def(self, args): """ def p_generator_exp(self, args): - """ - """ + """ """ def p_jump(self, args): """ @@ -402,7 +408,7 @@ def p_list_comprehension(self, args): list_if_not ::= expr jmp_true list_iter """ - def p_set_comp(self, args): + def p_gen_comp37(self, args): """ comp_iter ::= comp_for comp_body ::= gen_comp_body @@ -438,10 +444,10 @@ def p_32on(self, args): """ if_exp::= expr jmp_false expr jump_forward_else expr COME_FROM - # compare_chained2 is used in a "chained_compare": x <= y <= z + # compare_chained_right is used in a "chained_compare": x <= y <= z # used exclusively in compare_chained - compare_chained2 ::= expr COMPARE_OP RETURN_VALUE - compare_chained2 ::= expr COMPARE_OP RETURN_VALUE_LAMBDA + compare_chained_right ::= expr COMPARE_OP RETURN_VALUE + compare_chained_right ::= expr COMPARE_OP RETURN_VALUE_LAMBDA # Python < 3.5 no POP BLOCK whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK COME_FROM_LOOP @@ -510,7 +516,7 @@ def p_34on(self, args): _ifstmts_jump ::= c_stmts_opt JUMP_ABSOLUTE JUMP_FORWARD _come_froms """ - def p_35on(self, args): + def p_35_on(self, args): """ while1elsestmt ::= setup_loop l_stmts JUMP_BACK @@ -553,7 +559,7 @@ def p_35on(self, args): ifelsestmtl ::= testexpr_cf c_stmts_opt jb_else else_suitel # 3.5 Has jump optimization which can route the end of an - # "if/then" back to to a loop just before an else. + # "if/then" back to a loop just before an else. jump_absolute_else ::= jb_else jump_absolute_else ::= CONTINUE ELSE @@ -568,7 +574,7 @@ def p_35on(self, args): iflaststmt ::= testexpr c_stmts_opt JUMP_FORWARD """ - def p_37async(self, args): + def p_37_async(self, args): """ stmt ::= async_for_stmt37 stmt ::= async_for_stmt @@ -590,6 +596,7 @@ def p_37async(self, args): # Order of LOAD_CONST YIELD_FROM is switched from 3.6 to save a LOAD_CONST async_for_stmt37 ::= setup_loop expr GET_AITER + _come_froms SETUP_EXCEPT GET_ANEXT LOAD_CONST YIELD_FROM store @@ -602,6 +609,7 @@ def p_37async(self, args): async_forelse_stmt ::= setup_loop expr GET_AITER + _come_froms SETUP_EXCEPT GET_ANEXT LOAD_CONST YIELD_FROM store @@ -614,7 +622,7 @@ def p_37async(self, args): else_suite COME_FROM_LOOP """ - def p_37chained(self, args): + def p_37_chained(self, args): """ testtrue ::= compare_chained37 testfalse ::= compare_chained37_false @@ -622,53 +630,55 @@ def p_37chained(self, args): compare_chained ::= compare_chained37 compare_chained ::= compare_chained37_false - compare_chained37 ::= expr compare_chained1a_37 - compare_chained37 ::= expr compare_chained1c_37 + compare_chained37 ::= expr compared_chained_middlea_37 + compare_chained37 ::= expr compared_chained_middlec_37 - compare_chained37_false ::= expr compare_chained1_false_37 - compare_chained37_false ::= expr compare_chained1b_false_37 - compare_chained37_false ::= expr compare_chained2_false_37 + compare_chained37_false ::= expr compared_chained_middle_false_37 + compare_chained37_false ::= expr compared_chained_middleb_false_37 + compare_chained37_false ::= expr compare_chained_right_false_37 - compare_chained1a_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE - compare_chained1a_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE - compare_chained2a_37 COME_FROM POP_TOP COME_FROM - compare_chained1b_false_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE - compare_chained2b_false_37 POP_TOP _jump COME_FROM + compared_chained_middlea_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE + compared_chained_middlea_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE + compare_chained_righta_37 COME_FROM POP_TOP COME_FROM + compared_chained_middleb_false_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE + compare_chained_rightb_false_37 POP_TOP _jump COME_FROM - compare_chained1c_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE - compare_chained2a_37 POP_TOP + compared_chained_middlec_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE + compare_chained_righta_37 POP_TOP - compare_chained1_false_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE - compare_chained2c_37 POP_TOP JUMP_FORWARD COME_FROM - compare_chained1_false_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE - compare_chained2b_false_37 POP_TOP _jump COME_FROM + compared_chained_middle_false_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE + compare_chained_rightc_37 POP_TOP JUMP_FORWARD COME_FROM + compared_chained_middle_false_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE + compare_chained_rightb_false_37 POP_TOP _jump COME_FROM - compare_chained2_false_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE - compare_chained2a_false_37 POP_TOP JUMP_BACK COME_FROM + compare_chained_right_false_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP POP_JUMP_IF_FALSE + compare_chained_righta_false_37 POP_TOP JUMP_BACK COME_FROM - compare_chained2a_37 ::= expr COMPARE_OP come_from_opt POP_JUMP_IF_TRUE JUMP_FORWARD - compare_chained2a_37 ::= expr COMPARE_OP come_from_opt POP_JUMP_IF_TRUE JUMP_BACK - compare_chained2a_false_37 ::= expr COMPARE_OP come_from_opt POP_JUMP_IF_FALSE jf_cfs + compare_chained_righta_37 ::= expr COMPARE_OP come_from_opt POP_JUMP_IF_TRUE JUMP_FORWARD + compare_chained_righta_37 ::= expr COMPARE_OP come_from_opt POP_JUMP_IF_TRUE JUMP_BACK + compare_chained_righta_false_37 ::= expr COMPARE_OP come_from_opt POP_JUMP_IF_FALSE jf_cfs - compare_chained2b_false_37 ::= expr COMPARE_OP come_from_opt POP_JUMP_IF_FALSE JUMP_FORWARD COME_FROM - compare_chained2b_false_37 ::= expr COMPARE_OP come_from_opt POP_JUMP_IF_FALSE JUMP_FORWARD + compare_chained_rightb_false_37 ::= expr COMPARE_OP come_from_opt POP_JUMP_IF_FALSE JUMP_FORWARD COME_FROM + compare_chained_rightb_false_37 ::= expr COMPARE_OP come_from_opt POP_JUMP_IF_FALSE JUMP_FORWARD - compare_chained2c_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP come_from_opt POP_JUMP_IF_FALSE - compare_chained2a_false_37 ELSE - compare_chained2c_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP come_from_opt POP_JUMP_IF_FALSE - compare_chained2a_false_37 + compare_chained_rightc_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP come_from_opt POP_JUMP_IF_FALSE + compare_chained_righta_false_37 ELSE + compare_chained_rightc_37 ::= expr DUP_TOP ROT_THREE COMPARE_OP come_from_opt POP_JUMP_IF_FALSE + compare_chained_righta_false_37 """ - def p_37conditionals(self, args): + def p_37_conditionals(self, args): """ expr ::= if_exp37 if_exp37 ::= expr expr jf_cfs expr COME_FROM jf_cfs ::= JUMP_FORWARD _come_froms - ifelsestmt ::= testexpr c_stmts_opt jf_cfs else_suite opt_come_from_except + ifelsestmt ::= testexpr c_stmts_opt jf_cfs else_suite + opt_come_from_except # This is probably more realistically an "ifstmt" (with a null else) # see _cmp() of python3.8/distutils/__pycache__/version.cpython-38.opt-1.pyc - ifelsestmt ::= testexpr stmts jf_cfs else_suite_opt opt_come_from_except + ifelsestmt ::= testexpr stmts jf_cfs else_suite_opt + opt_come_from_except expr_pjit ::= expr POP_JUMP_IF_TRUE @@ -691,7 +701,8 @@ def p_37conditionals(self, args): expr ::= if_exp_37a expr ::= if_exp_37b if_exp_37a ::= and_not expr JUMP_FORWARD come_froms expr COME_FROM - if_exp_37b ::= expr jmp_false expr POP_JUMP_IF_FALSE jump_forward_else expr + if_exp_37b ::= expr jmp_false expr POP_JUMP_IF_FALSE + jump_forward_else expr jmp_false_cf ::= POP_JUMP_IF_FALSE COME_FROM comp_if ::= or jmp_false_cf comp_iter """ @@ -712,7 +723,16 @@ def p_comprehension3(self, args): list_comp ::= BUILD_LIST_0 list_iter lc_body ::= expr LIST_APPEND - list_for ::= expr for_iter store list_iter jb_or_c + + list_for ::= expr_or_arg + for_iter + store list_iter + jb_or_c _come_froms + + set_for ::= expr_or_arg + for_iter + store set_iter + jb_or_c _come_froms # This is seen in PyPy, but possibly it appears on other Python 3? list_if ::= expr jmp_false list_iter COME_FROM @@ -723,11 +743,11 @@ def p_comprehension3(self, args): stmt ::= set_comp_func - set_comp_func ::= BUILD_SET_0 LOAD_FAST for_iter store comp_iter - JUMP_BACK RETURN_VALUE RETURN_LAST - - set_comp_func ::= BUILD_SET_0 LOAD_FAST for_iter store comp_iter - COME_FROM JUMP_BACK RETURN_VALUE RETURN_LAST + # TODO: simplify this + set_comp_func ::= BUILD_SET_0 LOAD_ARG for_iter store comp_iter + JUMP_BACK ending_return + set_comp_func ::= BUILD_SET_0 LOAD_ARG for_iter store comp_iter + COME_FROM JUMP_BACK ending_return comp_body ::= dict_comp_body comp_body ::= set_comp_body @@ -738,16 +758,20 @@ def p_comprehension3(self, args): """ def p_dict_comp3(self, args): - """" + """ " expr ::= dict_comp stmt ::= dict_comp_func - dict_comp_func ::= BUILD_MAP_0 LOAD_FAST for_iter store - comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST + + dict_comp_func ::= BUILD_MAP_0 LOAD_ARG for_iter store + comp_iter JUMP_BACK ending_return comp_iter ::= comp_if comp_iter ::= comp_if_not comp_if_not ::= expr jmp_true comp_iter comp_iter ::= comp_body + + expr_or_arg ::= LOAD_ARG + expr_or_arg ::= expr """ def p_expr3(self, args): @@ -999,11 +1023,11 @@ def p_jump3(self, args): and ::= expr jmp_false expr COME_FROM or ::= expr_jt expr COME_FROM - # compare_chained1 is used exclusively in chained_compare - compare_chained1 ::= expr DUP_TOP ROT_THREE COMPARE_OP JUMP_IF_FALSE_OR_POP - compare_chained1 COME_FROM - compare_chained1 ::= expr DUP_TOP ROT_THREE COMPARE_OP JUMP_IF_FALSE_OR_POP - compare_chained2 COME_FROM + # compared_chained_middle is used exclusively in chained_compare + compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP JUMP_IF_FALSE_OR_POP + compared_chained_middle COME_FROM + compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP JUMP_IF_FALSE_OR_POP + compare_chained_right COME_FROM """ def p_stmt3(self, args): @@ -1121,7 +1145,7 @@ def p_36misc(self, args): come_froms JUMP_BACK come_froms POP_BLOCK COME_FROM_LOOP # 3.6 due to jump optimization, we sometimes add RETURN_END_IF where - # RETURN_VALUE is meant. Specifcally this can happen in + # RETURN_VALUE is meant. Specifically this can happen in # ifelsestmt -> ...else_suite _. suite_stmts... (last) stmt return ::= return_expr RETURN_END_IF return ::= return_expr RETURN_VALUE COME_FROM @@ -1190,10 +1214,10 @@ def p_36misc(self, args): tryfinally_return_stmt ::= SETUP_FINALLY suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_FINALLY - compare_chained2 ::= expr COMPARE_OP come_froms JUMP_FORWARD + compare_chained_right ::= expr COMPARE_OP come_froms JUMP_FORWARD """ - def p_37misc(self, args): + def p_37_misc(self, args): """ # long except clauses in a loop can sometimes cause a JUMP_BACK to turn into a # JUMP_FORWARD to a JUMP_BACK. And when this happens there is an additional @@ -1210,6 +1234,16 @@ def customize_grammar_rules(self, tokens, customize): super(Python37Parser, self).customize_grammar_rules(tokens, customize) self.check_reduce["call_kw"] = "AST" + # Opcode names in the custom_ops_processed set have rules that get added + # unconditionally and the rules are constant. So they need to be done + # only once and if we see the opcode a second we don't have to consider + # adding more rules. + # + # Note: BUILD_TUPLE_UNPACK_WITH_CALL gets considered by + # default because it starts with BUILD. So we'll set to ignore it from + # the start. + custom_ops_processed = set() + for i, token in enumerate(tokens): opname = token.kind @@ -1310,13 +1344,218 @@ def customize_grammar_rules(self, tokens, customize): self.addRule(rule, nop_func) rule = "starred ::= %s %s" % ("expr " * v, opname) self.addRule(rule, nop_func) + + elif opname == "GET_AITER": + self.add_unique_doc_rules("get_aiter ::= expr GET_AITER", customize) + + if not {"MAKE_FUNCTION_0", "MAKE_FUNCTION_CLOSURE"} in self.seen_ops: + self.addRule( + """ + expr ::= dict_comp_async + expr ::= generator_exp_async + expr ::= list_comp_async + + dict_comp_async ::= LOAD_DICTCOMP + LOAD_STR + MAKE_FUNCTION_0 + get_aiter + CALL_FUNCTION_1 + + dict_comp_async ::= BUILD_MAP_0 LOAD_ARG + dict_comp_async + + func_async_middle ::= POP_BLOCK JUMP_FORWARD COME_FROM_EXCEPT + DUP_TOP LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_TRUE + END_FINALLY COME_FROM + + func_async_prefix ::= _come_froms SETUP_EXCEPT GET_ANEXT LOAD_CONST YIELD_FROM + + generator_exp_async ::= load_genexpr LOAD_STR MAKE_FUNCTION_0 + get_aiter CALL_FUNCTION_1 + + genexpr_func_async ::= LOAD_ARG func_async_prefix + store func_async_middle comp_iter + JUMP_BACK COME_FROM + POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP + + # FIXME this is a workaround for probably some bug in the Earley parser + # if we use get_aiter, then list_comp_async doesn't match, and I don't + # understand why. + expr_get_aiter ::= expr GET_AITER + + list_afor ::= get_aiter list_afor2 + + list_afor2 ::= func_async_prefix + store func_async_middle list_iter + JUMP_BACK COME_FROM + POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP + + list_comp_async ::= BUILD_LIST_0 LOAD_ARG list_afor2 + list_comp_async ::= LOAD_LISTCOMP LOAD_STR MAKE_FUNCTION_0 + expr_get_aiter CALL_FUNCTION_1 + GET_AWAITABLE LOAD_CONST + YIELD_FROM + + list_iter ::= list_afor + + set_comp_async ::= LOAD_SETCOMP + LOAD_STR + MAKE_FUNCTION_0 + get_aiter + CALL_FUNCTION_1 + + set_comp_async ::= LOAD_CLOSURE + BUILD_TUPLE_1 + LOAD_SETCOMP + LOAD_STR MAKE_FUNCTION_CLOSURE + get_aiter CALL_FUNCTION_1 + await + """, + nop_func, + ) + custom_ops_processed.add(opname) + + self.addRule( + """ + dict_comp_async ::= BUILD_MAP_0 LOAD_ARG + dict_comp_async + + expr ::= dict_comp_async + expr ::= generator_exp_async + expr ::= list_comp_async + expr ::= set_comp_async + + func_async_middle ::= POP_BLOCK JUMP_FORWARD COME_FROM_EXCEPT + DUP_TOP LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_TRUE + END_FINALLY _come_froms + + # async_iter ::= block_break SETUP_EXCEPT GET_ANEXT LOAD_CONST YIELD_FROM + + get_aiter ::= expr GET_AITER + + list_afor ::= get_aiter list_afor2 + + list_comp_async ::= BUILD_LIST_0 LOAD_ARG list_afor2 + list_iter ::= list_afor + + + set_afor ::= get_aiter set_afor2 + set_iter ::= set_afor + set_iter ::= set_for + + set_comp_async ::= BUILD_SET_0 LOAD_ARG + set_comp_async + + """, + nop_func, + ) + custom_ops_processed.add(opname) + + elif opname == "GET_ANEXT": + self.addRule( + """ + expr ::= genexpr_func_async + expr ::= BUILD_MAP_0 genexpr_func_async + expr ::= list_comp_async + + dict_comp_async ::= BUILD_MAP_0 genexpr_func_async + + async_iter ::= _come_froms + SETUP_EXCEPT GET_ANEXT LOAD_CONST YIELD_FROM + + store_async_iter_end ::= store + POP_BLOCK JUMP_FORWARD COME_FROM_EXCEPT + DUP_TOP LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_TRUE + END_FINALLY COME_FROM + + # We use store_async_iter_end to make comp_iter come out in the right position, + # (after the logical "store") + genexpr_func_async ::= LOAD_ARG async_iter + store_async_iter_end + comp_iter + JUMP_BACK COME_FROM + POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP + + list_afor2 ::= async_iter + store + list_iter + JUMP_BACK + COME_FROM_FINALLY + END_ASYNC_FOR + + list_comp_async ::= BUILD_LIST_0 LOAD_ARG list_afor2 + + set_afor2 ::= async_iter + store + func_async_middle + set_iter + JUMP_BACK COME_FROM + POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP + + set_afor2 ::= expr_or_arg + set_iter_async + + set_comp_async ::= BUILD_SET_0 set_afor2 + + set_iter_async ::= async_iter + store + set_iter + JUMP_BACK + _come_froms + END_ASYNC_FOR + + return_expr_lambda ::= genexpr_func_async + LOAD_CONST RETURN_VALUE + RETURN_VALUE_LAMBDA + + return_expr_lambda ::= BUILD_SET_0 genexpr_func_async + RETURN_VALUE_LAMBDA LAMBDA_MARKER + """, + nop_func, + ) + custom_ops_processed.add(opname) + + elif opname == "GET_AWAITABLE": + rule_str = """ + await_expr ::= expr GET_AWAITABLE LOAD_CONST YIELD_FROM + expr ::= await_expr + """ + self.add_unique_doc_rules(rule_str, customize) + + elif opname == "GET_ITER": + self.addRule( + """ + expr ::= get_iter + get_iter ::= expr GET_ITER + """, + nop_func, + ) + custom_ops_processed.add(opname) + + elif opname == "LOAD_ASSERT": + if "PyPy" in customize: + rules_str = """ + stmt ::= JUMP_IF_NOT_DEBUG stmts COME_FROM + """ + self.add_unique_doc_rules(rules_str, customize) + + elif opname == "LOAD_ATTR": + self.addRule( + """ + expr ::= attribute + attribute ::= expr LOAD_ATTR + """, + nop_func, + ) + custom_ops_processed.add(opname) + elif opname == "SETUP_WITH": rules_str = """ with ::= expr SETUP_WITH POP_TOP suite_stmts_opt COME_FROM_WITH WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY # Removes POP_BLOCK LOAD_CONST from 3.6- - withasstmt ::= expr SETUP_WITH store suite_stmts_opt COME_FROM_WITH + with_as ::= expr SETUP_WITH store suite_stmts_opt COME_FROM_WITH WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY """ if self.version < (3, 8): @@ -1337,7 +1576,6 @@ def customize_grammar_rules(self, tokens, customize): pass def custom_classfunc_rule(self, opname, token, customize, next_token): - args_pos, args_kw = self.get_pos_kw(token) # Additional exprs for * and ** args: @@ -1350,14 +1588,18 @@ def custom_classfunc_rule(self, opname, token, customize, next_token): if frozenset(("GET_AWAITABLE", "YIELD_FROM")).issubset(self.seen_ops): rule = ( - "async_call ::= expr " + """ + await ::= GET_AWAITABLE LOAD_CONST YIELD_FROM + await_expr ::= expr await + expr ::= await_expr + async_call ::= expr """ + ("pos_arg " * args_pos) + ("kwarg " * args_kw) + "expr " * nak + token.kind + " GET_AWAITABLE LOAD_CONST YIELD_FROM" ) - self.add_unique_rule(rule, token.kind, uniq_param, customize) + self.add_unique_doc_rules(rule, customize) self.add_unique_rule( "expr ::= async_call", token.kind, uniq_param, customize ) @@ -1476,6 +1718,7 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): pass return False + def info(args): # Check grammar p = Python37Parser() @@ -1506,13 +1749,13 @@ class Python37ParserSingle(Python37Parser, PythonParserSingle): # FIXME: DRY this with other parseXX.py routines p = Python37Parser() p.check_grammar() - from uncompyle6 import PYTHON_VERSION, IS_PYPY + from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE - if PYTHON_VERSION == 3.7: + if PYTHON_VERSION_TRIPLE[:2] == (3, 7): lhs, rhs, tokens, right_recursive, dup_rhs = p.check_sets() from uncompyle6.scanner import get_scanner - s = get_scanner(PYTHON_VERSION, IS_PYPY) + s = get_scanner(PYTHON_VERSION_TRIPLE, IS_PYPY) opcode_set = set(s.opc.opname).union( set( """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM diff --git a/uncompyle6/parsers/parse37base.py b/uncompyle6/parsers/parse37base.py index 96c24c716..b89560f7f 100644 --- a/uncompyle6/parsers/parse37base.py +++ b/uncompyle6/parsers/parse37base.py @@ -1,14 +1,13 @@ -# Copyright (c) 2016-2017, 2019-2020, 2022 Rocky Bernstein +# Copyright (c) 2016-2017, 2019-2020, 2022-2024 Rocky Bernstein """ Python 3.7 base code. We keep non-custom-generated grammar rules out of this file. """ -from uncompyle6.parser import ParserError, PythonParser, nop_func -from uncompyle6.parsers.treenode import SyntaxTree from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG from spark_parser.spark import rule2str +from uncompyle6.parser import ParserError, PythonParser, nop_func from uncompyle6.parsers.reducecheck import ( - and_check, + and_invalid, ifelsestmt, iflaststmt, ifstmt, @@ -16,9 +15,10 @@ or_check, testtrue, tryelsestmtl3, - while1stmt, while1elsestmt, + while1stmt, ) +from uncompyle6.parsers.treenode import SyntaxTree class Python37BaseParser(PythonParser): @@ -38,7 +38,7 @@ def call_fn_name(token): return "%s_0" % (token.kind) def add_make_function_rule(self, rule, opname, attr, customize): - """Python 3.3 added a an addtional LOAD_STR before MAKE_FUNCTION and + """Python 3.3 added a an additional LOAD_STR before MAKE_FUNCTION and this has an effect on many rules. """ new_rule = rule % "LOAD_STR " @@ -54,7 +54,7 @@ def custom_build_class_rule(self, opname, i, token, tokens, customize): expr call CALL_FUNCTION_3 - """ + """ # FIXME: I bet this can be simplified # look for next MAKE_FUNCTION for i in range(i + 1, len(tokens)): @@ -104,7 +104,6 @@ def custom_build_class_rule(self, opname, i, token, tokens, customize): # organization for this. For example, arrange organize by opcode base? def customize_grammar_rules(self, tokens, customize): - is_pypy = False # For a rough break out on the first word. This may @@ -139,7 +138,7 @@ def customize_grammar_rules(self, tokens, customize): # Note: BUILD_TUPLE_UNPACK_WITH_CALL gets considered by # default because it starts with BUILD. So we'll set to ignore it from # the start. - custom_ops_processed = set(("BUILD_TUPLE_UNPACK_WITH_CALL",)) + custom_ops_processed = {"BUILD_TUPLE_UNPACK_WITH_CALL"} # A set of instruction operation names that exist in the token stream. # We use this customize the grammar that we create. @@ -348,7 +347,6 @@ def customize_grammar_rules(self, tokens, customize): self.addRule(rule, nop_func) elif opname_base in ("BUILD_MAP", "BUILD_MAP_UNPACK"): - if opname == "BUILD_MAP_UNPACK": self.addRule( """ @@ -367,7 +365,7 @@ def customize_grammar_rules(self, tokens, customize): if opname == "BUILD_MAP_n": # PyPy sometimes has no count. Sigh. rule = ( - "dict_comp_func ::= BUILD_MAP_n LOAD_FAST for_iter store " + "dict_comp_func ::= BUILD_MAP_n LOAD_ARG for_iter store " "comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST" ) self.add_unique_rule(rule, "dict_comp_func", 1, customize) @@ -431,35 +429,39 @@ def customize_grammar_rules(self, tokens, customize): "BUILD_TUPLE", "BUILD_TUPLE_UNPACK", ): - v = token.attr + collection_size = token.attr is_LOAD_CLOSURE = False if opname_base == "BUILD_TUPLE": # If is part of a "load_closure", then it is not part of a # "list". is_LOAD_CLOSURE = True - for j in range(v): + for j in range(collection_size): if tokens[i - j - 1].kind != "LOAD_CLOSURE": is_LOAD_CLOSURE = False break if is_LOAD_CLOSURE: - rule = "load_closure ::= %s%s" % (("LOAD_CLOSURE " * v), opname) + rule = "load_closure ::= %s%s" % ( + ("LOAD_CLOSURE " * collection_size), + opname, + ) self.add_unique_rule(rule, opname, token.attr, customize) - if not is_LOAD_CLOSURE or v == 0: + if not is_LOAD_CLOSURE or collection_size == 0: # We do this complicated test to speed up parsing of # pathelogically long literals, especially those over 1024. - build_count = token.attr - thousands = build_count // 1024 - thirty32s = (build_count // 32) % 32 + thousands = collection_size // 1024 + thirty32s = (collection_size // 32) % 32 if thirty32s > 0: rule = "expr32 ::=%s" % (" expr" * 32) - self.add_unique_rule(rule, opname_base, build_count, customize) + self.add_unique_rule( + rule, opname_base, collection_size, customize + ) pass if thousands > 0: self.add_unique_rule( "expr1024 ::=%s" % (" expr32" * 32), opname_base, - build_count, + collection_size, customize, ) pass @@ -468,7 +470,7 @@ def customize_grammar_rules(self, tokens, customize): ("%s ::= " % collection) + "expr1024 " * thousands + "expr32 " * thirty32s - + "expr " * (build_count % 32) + + "expr " * (collection_size % 32) + opname ) self.add_unique_rules(["expr ::= %s" % collection, rule], customize) @@ -478,8 +480,8 @@ def customize_grammar_rules(self, tokens, customize): if token.attr == 2: self.add_unique_rules( [ - "expr ::= build_slice2", - "build_slice2 ::= expr expr BUILD_SLICE_2", + "expr ::= slice2", + "slice2 ::= expr expr BUILD_SLICE_2", ], customize, ) @@ -489,8 +491,8 @@ def customize_grammar_rules(self, tokens, customize): ) self.add_unique_rules( [ - "expr ::= build_slice3", - "build_slice3 ::= expr expr expr BUILD_SLICE_3", + "expr ::= slice3", + "slice3 ::= expr expr expr BUILD_SLICE_3", ], customize, ) @@ -521,9 +523,9 @@ def customize_grammar_rules(self, tokens, customize): "CALL_FUNCTION_VAR_KW", ) ) or opname.startswith("CALL_FUNCTION_KW"): - if opname == "CALL_FUNCTION" and token.attr == 1: rule = """ + expr ::= dict_comp dict_comp ::= LOAD_DICTCOMP LOAD_STR MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1 classdefdeco1 ::= expr classdefdeco2 CALL_FUNCTION_1 @@ -558,11 +560,12 @@ def customize_grammar_rules(self, tokens, customize): nak = (len(opname_base) - len("CALL_METHOD")) // 3 rule = ( "call ::= expr " - + ("expr " * args_pos) + + ("pos_arg " * args_pos) + ("kwarg " * args_kw) + "expr " * nak + opname ) + self.add_unique_rule(rule, opname, token.attr, customize) elif opname == "CONTINUE": @@ -644,7 +647,7 @@ def customize_grammar_rules(self, tokens, customize): func_async_middle ::= POP_BLOCK JUMP_FORWARD COME_FROM_EXCEPT DUP_TOP LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_TRUE END_FINALLY COME_FROM - genexpr_func_async ::= LOAD_FAST func_async_prefix + genexpr_func_async ::= LOAD_ARG func_async_prefix store func_async_middle comp_iter JUMP_BACK COME_FROM POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP @@ -660,7 +663,7 @@ def customize_grammar_rules(self, tokens, customize): store func_async_middle list_iter JUMP_BACK COME_FROM POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_TOP - list_comp_async ::= BUILD_LIST_0 LOAD_FAST list_afor2 + list_comp_async ::= BUILD_LIST_0 LOAD_ARG list_afor2 get_aiter ::= expr GET_AITER list_afor ::= get_aiter list_afor2 list_iter ::= list_afor @@ -714,7 +717,9 @@ def customize_grammar_rules(self, tokens, customize): ) custom_ops_processed.add(opname) elif opname == "LOAD_LISTCOMP": - self.add_unique_rule("expr ::= listcomp", opname, token.attr, customize) + self.add_unique_rule( + "expr ::= list_comp", opname, token.attr, customize + ) custom_ops_processed.add(opname) elif opname == "LOAD_NAME": if ( @@ -793,7 +798,7 @@ def customize_grammar_rules(self, tokens, customize): # and have GET_ITER CALL_FUNCTION_1 # Todo: For Pypy we need to modify this slightly rule_pat = ( - "listcomp ::= %sload_closure LOAD_LISTCOMP %%s%s expr " + "list_comp ::= %sload_closure LOAD_LISTCOMP %%s%s expr " "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * args_pos, opname) ) @@ -891,14 +896,14 @@ def customize_grammar_rules(self, tokens, customize): # 'exprs' in the rule above into a # tuple. rule_pat = ( - "listcomp ::= load_closure LOAD_LISTCOMP %%s%s " + "list_comp ::= load_closure LOAD_LISTCOMP %%s%s " "expr GET_ITER CALL_FUNCTION_1" % (opname,) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize ) rule_pat = ( - "listcomp ::= %sLOAD_LISTCOMP %%s%s expr " + "list_comp ::= %sLOAD_LISTCOMP %%s%s expr " "GET_ITER CALL_FUNCTION_1" % ("expr " * args_pos, opname) ) self.add_make_function_rule( @@ -932,7 +937,7 @@ def customize_grammar_rules(self, tokens, customize): # and have GET_ITER CALL_FUNCTION_1 # Todo: For Pypy we need to modify this slightly rule_pat = ( - "listcomp ::= %sLOAD_LISTCOMP %%s%s expr " + "list_comp ::= %sLOAD_LISTCOMP %%s%s expr " "GET_ITER CALL_FUNCTION_1" % ("expr " * args_pos, opname) ) self.add_make_function_rule( @@ -1050,14 +1055,14 @@ def customize_grammar_rules(self, tokens, customize): elif opname == "SETUP_WITH": rules_str = """ stmt ::= with - stmt ::= withasstmt + stmt ::= with_as with ::= expr SETUP_WITH POP_TOP suite_stmts_opt COME_FROM_WITH with_suffix - withasstmt ::= expr SETUP_WITH store suite_stmts_opt COME_FROM_WITH + with_as ::= expr SETUP_WITH store suite_stmts_opt COME_FROM_WITH with_suffix with ::= expr @@ -1066,7 +1071,7 @@ def customize_grammar_rules(self, tokens, customize): POP_BLOCK LOAD_CONST COME_FROM_WITH with_suffix - withasstmt ::= expr + with_as ::= expr SETUP_WITH store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_WITH with_suffix @@ -1075,7 +1080,7 @@ def customize_grammar_rules(self, tokens, customize): SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_WITH with_suffix - withasstmt ::= expr + with_as ::= expr SETUP_WITH store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_WITH with_suffix @@ -1093,17 +1098,18 @@ def customize_grammar_rules(self, tokens, customize): POP_BLOCK LOAD_CONST COME_FROM_WITH with_suffix - withasstmt ::= expr - SETUP_WITH store suite_stmts_opt - POP_BLOCK LOAD_CONST COME_FROM_WITH - - withasstmt ::= expr - SETUP_WITH store suite_stmts - POP_BLOCK BEGIN_FINALLY COME_FROM_WITH with_suffix with ::= expr SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK BEGIN_FINALLY COME_FROM_WITH with_suffix + + with_as ::= expr + SETUP_WITH store suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM_WITH + + with_as ::= expr + SETUP_WITH store suite_stmts + POP_BLOCK BEGIN_FINALLY COME_FROM_WITH with_suffix """ self.addRule(rules_str, nop_func) @@ -1128,7 +1134,7 @@ def customize_grammar_rules(self, tokens, customize): self.reduce_check_table = { "_ifstmts_jump": ifstmts_jump, - "and": and_check, + "and": and_invalid, "ifelsestmt": ifelsestmt, "ifelsestmtl": ifelsestmt, "iflaststmt": iflaststmt, @@ -1252,20 +1258,14 @@ def reduce_is_invalid(self, rule, ast, tokens, first, last): try: if fn: return fn(self, lhs, n, rule, ast, tokens, first, last) - except: - import sys, traceback + except Exception: + import sys + import traceback print( - ("Exception in %s %s\n" - + "rule: %s\n" - + "offsets %s .. %s") - % ( - fn.__name__, - sys.exc_info()[1], - rule2str(rule), - tokens[first].offset, - tokens[last].offset, - ) + f"Exception in {fn.__name__} {sys.exc_info()[1]}\n" + + f"rule: {rule2str(rule)}\n" + + f"offsets {tokens[first].offset} .. {tokens[last].offset}" ) print(traceback.print_tb(sys.exc_info()[2], -1)) raise ParserError(tokens[last], tokens[last].off2int(), self.debug["rules"]) diff --git a/uncompyle6/parsers/parse38.py b/uncompyle6/parsers/parse38.py index 5cd8bc56c..4ecb5e59a 100644 --- a/uncompyle6/parsers/parse38.py +++ b/uncompyle6/parsers/parse38.py @@ -1,4 +1,4 @@ -# Copyright (c) 2017-2020 Rocky Bernstein +# Copyright (c) 2017-2020, 2022-2024 Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,11 +17,274 @@ """ from __future__ import print_function -from uncompyle6.parser import PythonParserSingle from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG + +from uncompyle6.parser import PythonParserSingle, nop_func from uncompyle6.parsers.parse37 import Python37Parser +from uncompyle6.parsers.reducecheck.pop_return import pop_return_check + class Python38Parser(Python37Parser): + def p_38_stmt(self, args): + """ + stmt ::= async_for_stmt38 + stmt ::= async_forelse_stmt38 + stmt ::= call_stmt + stmt ::= continue + stmt ::= for38 + stmt ::= forelselaststmt38 + stmt ::= forelselaststmtl38 + stmt ::= forelsestmt38 + stmt ::= try_elsestmtl38 + stmt ::= try_except38 + stmt ::= try_except38r + stmt ::= try_except38r2 + stmt ::= try_except38r3 + stmt ::= try_except38r4 + stmt ::= try_except_as + stmt ::= try_except_ret38 + stmt ::= tryfinally38astmt + stmt ::= tryfinally38rstmt + stmt ::= tryfinally38rstmt2 + stmt ::= tryfinally38rstmt3 + stmt ::= tryfinally38stmt + stmt ::= whileTruestmt38 + stmt ::= whilestmt38 + + call_stmt ::= call + break ::= POP_BLOCK BREAK_LOOP + break ::= POP_BLOCK POP_TOP BREAK_LOOP + break ::= POP_TOP BREAK_LOOP + break ::= POP_EXCEPT BREAK_LOOP + + # The "continue" rule is a weird one. In 3.8, CONTINUE_LOOP was removed. + # Inside an loop we can have this, which can only appear in side a try/except + # And it can also appear at the end of the try except. + continue ::= POP_EXCEPT JUMP_BACK + + + # FIXME: this should be restricted to being inside a try block + stmt ::= except_ret38 + stmt ::= except_ret38a + + # FIXME: this should be added only when seeing GET_AITER or YIELD_FROM + async_for ::= GET_AITER _come_froms + SETUP_FINALLY GET_ANEXT LOAD_CONST YIELD_FROM POP_BLOCK + async_for_stmt38 ::= expr async_for + store for_block + COME_FROM_FINALLY + END_ASYNC_FOR + + genexpr_func_async ::= LOAD_ARG func_async_prefix + store comp_iter + JUMP_BACK COME_FROM_FINALLY + END_ASYNC_FOR + + # FIXME: "come_froms" after the "else_suite" or END_ASYNC_FOR distinguish which of + # for / forelse is used. Add "come_froms" and check of add up control-flow detection phase. + async_forelse_stmt38 ::= expr + GET_AITER + SETUP_FINALLY + GET_ANEXT + LOAD_CONST + YIELD_FROM + POP_BLOCK + store for_block + COME_FROM_FINALLY + END_ASYNC_FOR + else_suite + + # Seems to be used to discard values before a return in a "for" loop + discard_top ::= ROT_TWO POP_TOP + discard_tops ::= discard_top+ + + return ::= return_expr + discard_tops RETURN_VALUE + + return ::= popb_return + return ::= pop_return + return ::= pop_ex_return + except_stmt ::= pop_ex_return + pop_return ::= POP_TOP return_expr RETURN_VALUE + popb_return ::= return_expr POP_BLOCK RETURN_VALUE + pop_ex_return ::= return_expr ROT_FOUR POP_EXCEPT RETURN_VALUE + + # 3.8 can push a looping JUMP_BACK into into a JUMP_ from a statement that jumps to it + lastl_stmt ::= ifpoplaststmtl + ifpoplaststmtl ::= testexpr POP_TOP c_stmts_opt + ifelsestmtl ::= testexpr c_stmts_opt jb_cfs else_suitel JUMP_BACK come_froms + + # Keep indices the same in ifelsestmtl + cf_pt ::= COME_FROM POP_TOP + ifelsestmtl ::= testexpr c_stmts cf_pt else_suite + + for38 ::= expr get_iter store for_block JUMP_BACK + for38 ::= expr get_for_iter store for_block JUMP_BACK + for38 ::= expr get_for_iter store for_block JUMP_BACK POP_BLOCK + for38 ::= expr get_for_iter store for_block + + forelsestmt38 ::= expr get_for_iter store for_block POP_BLOCK else_suite + forelsestmt38 ::= expr get_for_iter store for_block JUMP_BACK _come_froms + else_suite + + forelselaststmt38 ::= expr get_for_iter store for_block POP_BLOCK else_suitec + forelselaststmtl38 ::= expr get_for_iter store for_block POP_BLOCK else_suitel + + returns_in_except ::= _stmts except_return_value + except_return_value ::= POP_BLOCK return + except_return_value ::= expr POP_BLOCK RETURN_VALUE + + whilestmt38 ::= _come_froms testexpr l_stmts_opt COME_FROM JUMP_BACK + POP_BLOCK + whilestmt38 ::= _come_froms testexpr l_stmts_opt JUMP_BACK POP_BLOCK + whilestmt38 ::= _come_froms testexpr l_stmts_opt JUMP_BACK come_froms + whilestmt38 ::= _come_froms testexpr returns POP_BLOCK + whilestmt38 ::= _come_froms testexpr l_stmts JUMP_BACK + whilestmt38 ::= _come_froms testexpr l_stmts come_froms + + # while1elsestmt ::= l_stmts JUMP_BACK + whileTruestmt ::= _come_froms l_stmts JUMP_BACK POP_BLOCK + while1stmt ::= _come_froms l_stmts COME_FROM JUMP_BACK COME_FROM_LOOP + whileTruestmt38 ::= _come_froms l_stmts JUMP_BACK + whileTruestmt38 ::= _come_froms l_stmts JUMP_BACK COME_FROM_EXCEPT_CLAUSE + whileTruestmt38 ::= _come_froms pass JUMP_BACK + + for_block ::= _come_froms l_stmts_opt _come_from_loops JUMP_BACK + + except_cond1 ::= DUP_TOP expr COMPARE_OP jmp_false + POP_TOP POP_TOP POP_TOP + POP_EXCEPT + except_cond_as ::= DUP_TOP expr COMPARE_OP POP_JUMP_IF_FALSE + POP_TOP STORE_FAST POP_TOP + + try_elsestmtl38 ::= SETUP_FINALLY suite_stmts_opt POP_BLOCK + except_handler38 COME_FROM + else_suitel opt_come_from_except + try_except ::= SETUP_FINALLY suite_stmts_opt POP_BLOCK + except_handler38 + + try_except38 ::= SETUP_FINALLY POP_BLOCK POP_TOP suite_stmts_opt + except_handler38a + + # suite_stmts has a return + try_except38 ::= SETUP_FINALLY POP_BLOCK suite_stmts + except_handler38b + try_except38r ::= SETUP_FINALLY return_except + except_handler38b + return_except ::= stmts POP_BLOCK return + + + # In 3.8 there seems to be some sort of code fiddle with POP_EXCEPT when there + # is a final return in the "except" block. + # So we treat the "return" separate from the other statements + cond_except_stmt ::= except_cond1 except_stmts + cond_except_stmts_opt ::= cond_except_stmt* + + try_except38r2 ::= SETUP_FINALLY + suite_stmts_opt + POP_BLOCK JUMP_FORWARD + COME_FROM_FINALLY POP_TOP POP_TOP POP_TOP + cond_except_stmts_opt + POP_EXCEPT return + END_FINALLY + COME_FROM + + try_except38r3 ::= SETUP_FINALLY + suite_stmts_opt + POP_BLOCK JUMP_FORWARD + COME_FROM_FINALLY + cond_except_stmts_opt + POP_EXCEPT return + COME_FROM + END_FINALLY + COME_FROM + + + try_except38r4 ::= SETUP_FINALLY + returns_in_except + COME_FROM_FINALLY + except_cond1 + return + COME_FROM + END_FINALLY + + + # suite_stmts has a return + try_except38 ::= SETUP_FINALLY POP_BLOCK suite_stmts + except_handler38b + try_except_as ::= SETUP_FINALLY POP_BLOCK suite_stmts + except_handler_as END_FINALLY COME_FROM + try_except_as ::= SETUP_FINALLY suite_stmts + except_handler_as END_FINALLY COME_FROM + + try_except_ret38 ::= SETUP_FINALLY returns except_ret38a + try_except_ret38a ::= SETUP_FINALLY returns except_handler38c + END_FINALLY come_from_opt + + # Note: there is a suite_stmts_opt which seems + # to be bookkeeping which is not expressed in source code + except_ret38 ::= SETUP_FINALLY expr ROT_FOUR POP_BLOCK POP_EXCEPT + CALL_FINALLY RETURN_VALUE COME_FROM + COME_FROM_FINALLY + suite_stmts_opt END_FINALLY + except_ret38a ::= COME_FROM_FINALLY POP_TOP POP_TOP POP_TOP + expr ROT_FOUR + POP_EXCEPT RETURN_VALUE END_FINALLY + + except_handler38 ::= _jump COME_FROM_FINALLY + except_stmts END_FINALLY opt_come_from_except + except_handler38a ::= COME_FROM_FINALLY POP_TOP POP_TOP POP_TOP + POP_EXCEPT POP_TOP stmts END_FINALLY + + except_handler38c ::= COME_FROM_FINALLY except_cond1a except_stmts + POP_EXCEPT JUMP_FORWARD COME_FROM + except_handler_as ::= COME_FROM_FINALLY except_cond_as tryfinallystmt + POP_EXCEPT JUMP_FORWARD COME_FROM + + tryfinallystmt ::= SETUP_FINALLY suite_stmts_opt POP_BLOCK + BEGIN_FINALLY COME_FROM_FINALLY suite_stmts_opt + END_FINALLY + + + lc_setup_finally ::= LOAD_CONST SETUP_FINALLY + call_finally_pt ::= CALL_FINALLY POP_TOP + cf_cf_finally ::= come_from_opt COME_FROM_FINALLY + pop_finally_pt ::= POP_FINALLY POP_TOP + ss_end_finally ::= suite_stmts END_FINALLY + sf_pb_call_returns ::= SETUP_FINALLY POP_BLOCK CALL_FINALLY returns + + + # FIXME: DRY rules below + tryfinally38rstmt ::= sf_pb_call_returns + cf_cf_finally + ss_end_finally + tryfinally38rstmt ::= sf_pb_call_returns + cf_cf_finally END_FINALLY + suite_stmts + tryfinally38rstmt ::= sf_pb_call_returns + cf_cf_finally POP_FINALLY + ss_end_finally + tryfinally38rstmt ::= sf_bp_call_returns + COME_FROM_FINALLY POP_FINALLY + ss_end_finally + + tryfinally38rstmt2 ::= lc_setup_finally POP_BLOCK call_finally_pt + returns + cf_cf_finally pop_finally_pt + ss_end_finally POP_TOP + tryfinally38rstmt3 ::= SETUP_FINALLY expr POP_BLOCK CALL_FINALLY RETURN_VALUE + COME_FROM COME_FROM_FINALLY + ss_end_finally + + tryfinally38stmt ::= SETUP_FINALLY suite_stmts_opt POP_BLOCK + BEGIN_FINALLY COME_FROM_FINALLY + POP_FINALLY suite_stmts_opt END_FINALLY + + tryfinally38astmt ::= LOAD_CONST SETUP_FINALLY suite_stmts_opt POP_BLOCK + BEGIN_FINALLY COME_FROM_FINALLY + POP_FINALLY POP_TOP suite_stmts_opt END_FINALLY POP_TOP + """ + def p_38walrus(self, args): """ # named_expr is also known as the "walrus op" := @@ -29,212 +292,6 @@ def p_38walrus(self, args): named_expr ::= expr DUP_TOP store """ - def p_38misc(self, args): - """ - stmt ::= async_for_stmt38 - stmt ::= async_forelse_stmt38 - stmt ::= for38 - stmt ::= forelsestmt38 - stmt ::= forelselaststmt38 - stmt ::= forelselaststmtl38 - stmt ::= tryfinally38stmt - stmt ::= tryfinally38rstmt - stmt ::= tryfinally38rstmt2 - stmt ::= tryfinally38rstmt3 - stmt ::= tryfinally38astmt - stmt ::= try_elsestmtl38 - stmt ::= try_except_ret38 - stmt ::= try_except38 - stmt ::= try_except_as - stmt ::= whilestmt38 - stmt ::= whileTruestmt38 - stmt ::= call_stmt - stmt ::= continue - - call_stmt ::= call - break ::= POP_BLOCK BREAK_LOOP - break ::= POP_BLOCK POP_TOP BREAK_LOOP - break ::= POP_TOP BREAK_LOOP - break ::= POP_EXCEPT BREAK_LOOP - - # The "continue" rule is a weird one. In 3.8, CONTINUE_LOOP was removed. - # Inside an loop we can have this, which can only appear in side a try/except - # And it can also appear at the end of the try except. - continue ::= POP_EXCEPT JUMP_BACK - - - # FIXME: this should be restricted to being inside a try block - stmt ::= except_ret38 - stmt ::= except_ret38a - - # FIXME: this should be added only when seeing GET_AITER or YIELD_FROM - async_for ::= GET_AITER _come_froms - SETUP_FINALLY GET_ANEXT LOAD_CONST YIELD_FROM POP_BLOCK - async_for_stmt38 ::= expr async_for - store for_block - COME_FROM_FINALLY - END_ASYNC_FOR - - genexpr_func_async ::= LOAD_FAST func_async_prefix - store comp_iter - JUMP_BACK COME_FROM_FINALLY - END_ASYNC_FOR - - # FIXME: come froms after the else_suite or END_ASYNC_FOR distinguish which of - # for / forelse is used. Add come froms and check of add up control-flow detection phase. - async_forelse_stmt38 ::= expr - GET_AITER - SETUP_FINALLY - GET_ANEXT - LOAD_CONST - YIELD_FROM - POP_BLOCK - store for_block - COME_FROM_FINALLY - END_ASYNC_FOR - else_suite - - # Seems to be used to discard values before a return in a "for" loop - discard_top ::= ROT_TWO POP_TOP - discard_tops ::= discard_top+ - - return ::= return_expr - discard_tops RETURN_VALUE - - return ::= popb_return - return ::= pop_return - return ::= pop_ex_return - except_stmt ::= pop_ex_return - pop_return ::= POP_TOP return_expr RETURN_VALUE - popb_return ::= return_expr POP_BLOCK RETURN_VALUE - pop_ex_return ::= return_expr ROT_FOUR POP_EXCEPT RETURN_VALUE - - # 3.8 can push a looping JUMP_BACK into into a JUMP_ from a statement that jumps to it - lastl_stmt ::= ifpoplaststmtl - ifpoplaststmtl ::= testexpr POP_TOP c_stmts_opt - ifelsestmtl ::= testexpr c_stmts_opt jb_cfs else_suitel JUMP_BACK come_froms - - # Keep indices the same in ifelsestmtl - cf_pt ::= COME_FROM POP_TOP - ifelsestmtl ::= testexpr c_stmts cf_pt else_suite - - for38 ::= expr get_iter store for_block JUMP_BACK - for38 ::= expr get_for_iter store for_block JUMP_BACK - for38 ::= expr get_for_iter store for_block JUMP_BACK POP_BLOCK - for38 ::= expr get_for_iter store for_block - - forelsestmt38 ::= expr get_for_iter store for_block POP_BLOCK else_suite - forelsestmt38 ::= expr get_for_iter store for_block JUMP_BACK _come_froms else_suite - - forelselaststmt38 ::= expr get_for_iter store for_block POP_BLOCK else_suitec - forelselaststmtl38 ::= expr get_for_iter store for_block POP_BLOCK else_suitel - - whilestmt38 ::= _come_froms testexpr l_stmts_opt COME_FROM JUMP_BACK POP_BLOCK - whilestmt38 ::= _come_froms testexpr l_stmts_opt JUMP_BACK POP_BLOCK - whilestmt38 ::= _come_froms testexpr l_stmts_opt JUMP_BACK come_froms - whilestmt38 ::= _come_froms testexpr returns POP_BLOCK - whilestmt38 ::= _come_froms testexpr l_stmts JUMP_BACK - whilestmt38 ::= _come_froms testexpr l_stmts come_froms - - # while1elsestmt ::= l_stmts JUMP_BACK - whileTruestmt ::= _come_froms l_stmts JUMP_BACK POP_BLOCK - while1stmt ::= _come_froms l_stmts COME_FROM_LOOP - while1stmt ::= _come_froms l_stmts COME_FROM JUMP_BACK COME_FROM_LOOP - whileTruestmt38 ::= _come_froms l_stmts JUMP_BACK - whileTruestmt38 ::= _come_froms l_stmts JUMP_BACK COME_FROM_EXCEPT_CLAUSE - - for_block ::= _come_froms l_stmts_opt _come_from_loops JUMP_BACK - - except_cond1 ::= DUP_TOP expr COMPARE_OP jmp_false - POP_TOP POP_TOP POP_TOP - POP_EXCEPT - except_cond_as ::= DUP_TOP expr COMPARE_OP POP_JUMP_IF_FALSE - POP_TOP STORE_FAST POP_TOP - - try_elsestmtl38 ::= SETUP_FINALLY suite_stmts_opt POP_BLOCK - except_handler38 COME_FROM - else_suitel opt_come_from_except - try_except ::= SETUP_FINALLY suite_stmts_opt POP_BLOCK - except_handler38 - try_except38 ::= SETUP_FINALLY POP_BLOCK POP_TOP suite_stmts_opt - except_handler38a - - # suite_stmts has a return - try_except38 ::= SETUP_FINALLY POP_BLOCK suite_stmts - except_handler38b - try_except_as ::= SETUP_FINALLY POP_BLOCK suite_stmts - except_handler_as END_FINALLY COME_FROM - try_except_as ::= SETUP_FINALLY suite_stmts - except_handler_as END_FINALLY COME_FROM - - try_except_ret38 ::= SETUP_FINALLY returns except_ret38a - try_except_ret38a ::= SETUP_FINALLY returns except_handler38c - END_FINALLY come_from_opt - - # Note: there is a suite_stmts_opt which seems - # to be bookkeeping which is not expressed in source code - except_ret38 ::= SETUP_FINALLY expr ROT_FOUR POP_BLOCK POP_EXCEPT - CALL_FINALLY RETURN_VALUE COME_FROM - COME_FROM_FINALLY - suite_stmts_opt END_FINALLY - except_ret38a ::= COME_FROM_FINALLY POP_TOP POP_TOP POP_TOP - expr ROT_FOUR - POP_EXCEPT RETURN_VALUE END_FINALLY - - except_handler38 ::= _jump COME_FROM_FINALLY - except_stmts END_FINALLY opt_come_from_except - except_handler38a ::= COME_FROM_FINALLY POP_TOP POP_TOP POP_TOP - POP_EXCEPT POP_TOP stmts END_FINALLY - - except_handler38c ::= COME_FROM_FINALLY except_cond1a except_stmts - POP_EXCEPT JUMP_FORWARD COME_FROM - except_handler_as ::= COME_FROM_FINALLY except_cond_as tryfinallystmt - POP_EXCEPT JUMP_FORWARD COME_FROM - - tryfinallystmt ::= SETUP_FINALLY suite_stmts_opt POP_BLOCK - BEGIN_FINALLY COME_FROM_FINALLY suite_stmts_opt - END_FINALLY - - - lc_setup_finally ::= LOAD_CONST SETUP_FINALLY - call_finally_pt ::= CALL_FINALLY POP_TOP - cf_cf_finally ::= come_from_opt COME_FROM_FINALLY - pop_finally_pt ::= POP_FINALLY POP_TOP - ss_end_finally ::= suite_stmts END_FINALLY - sf_pb_call_returns ::= SETUP_FINALLY POP_BLOCK CALL_FINALLY returns - - - # FIXME: DRY rules below - tryfinally38rstmt ::= sf_pb_call_returns - cf_cf_finally - ss_end_finally - tryfinally38rstmt ::= sf_pb_call_returns - cf_cf_finally END_FINALLY - suite_stmts - tryfinally38rstmt ::= sf_pb_call_returns - cf_cf_finally POP_FINALLY - ss_end_finally - tryfinally38rstmt ::= sf_bp_call_returns - COME_FROM_FINALLY POP_FINALLY - ss_end_finally - - tryfinally38rstmt2 ::= lc_setup_finally POP_BLOCK call_finally_pt - returns - cf_cf_finally pop_finally_pt - ss_end_finally POP_TOP - tryfinally38rstmt3 ::= SETUP_FINALLY expr POP_BLOCK CALL_FINALLY RETURN_VALUE - COME_FROM COME_FROM_FINALLY - ss_end_finally - - tryfinally38stmt ::= SETUP_FINALLY suite_stmts_opt POP_BLOCK - BEGIN_FINALLY COME_FROM_FINALLY - POP_FINALLY suite_stmts_opt END_FINALLY - - tryfinally38astmt ::= LOAD_CONST SETUP_FINALLY suite_stmts_opt POP_BLOCK - BEGIN_FINALLY COME_FROM_FINALLY - POP_FINALLY POP_TOP suite_stmts_opt END_FINALLY POP_TOP - """ - def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): super(Python38Parser, self).__init__(debug_parser) self.customized = {} @@ -308,17 +365,241 @@ def remove_rules_38(self): """ ) - def customize_grammar_rules(self, tokens, customize): - super(Python37Parser, self).customize_grammar_rules(tokens, customize) + def customize_reduce_checks_full38(self, tokens, customize): + """ + Extra tests when a reduction is made in the full grammar. + + Reductions here are extended from those used in the lambda grammar + """ self.remove_rules_38() + self.check_reduce["pop_return"] = "tokens" self.check_reduce["whileTruestmt38"] = "tokens" self.check_reduce["whilestmt38"] = "tokens" self.check_reduce["try_elsestmtl38"] = "AST" + self.reduce_check_table["pop_return"] = pop_return_check + + def customize_grammar_rules(self, tokens, customize): + super(Python37Parser, self).customize_grammar_rules(tokens, customize) + self.customize_reduce_checks_full38(tokens, customize) + + # For a rough break out on the first word. This may + # include instructions that don't need customization, + # but we'll do a finer check after the rough breakout. + customize_instruction_basenames = frozenset( + ( + "BEFORE", + "BUILD", + "CALL", + "DICT", + "GET", + "FORMAT", + "LIST", + "LOAD", + "MAKE", + "SETUP", + "UNPACK", + ) + ) + + # Opcode names in the custom_ops_processed set have rules that get added + # unconditionally and the rules are constant. So they need to be done + # only once and if we see the opcode a second we don't have to consider + # adding more rules. + # + custom_ops_processed = frozenset() + + # A set of instruction operation names that exist in the token stream. + # We use this customize the grammar that we create. + # 2.6-compatible set comprehensions + self.seen_ops = frozenset([t.kind for t in tokens]) + self.seen_op_basenames = frozenset( + [opname[: opname.rfind("_")] for opname in self.seen_ops] + ) + + custom_ops_processed = {"DICT_MERGE"} + + # Loop over instructions adding custom grammar rules based on + # a specific instruction seen. + + if "PyPy" in customize: + self.addRule( + """ + stmt ::= assign3_pypy + stmt ::= assign2_pypy + assign3_pypy ::= expr expr expr store store store + assign2_pypy ::= expr expr store store + """, + nop_func, + ) + + n = len(tokens) + # Determine if we have an iteration CALL_FUNCTION_1. + has_get_iter_call_function1 = False + for i, token in enumerate(tokens): + if token == "GET_ITER" and i < n - 2 and tokens[i + 1] == "CALL_FUNCTION_1": + has_get_iter_call_function1 = True + + for i, token in enumerate(tokens): + opname = token.kind + + # Do a quick breakout before testing potentially + # each of the dozen or so instruction in if elif. + if ( + opname[: opname.find("_")] not in customize_instruction_basenames + or opname in custom_ops_processed + ): + continue + + opname_base = opname[: opname.rfind("_")] + + # Do a quick breakout before testing potentially + # each of the dozen or so instruction in if elif. + if ( + opname[: opname.find("_")] not in customize_instruction_basenames + or opname in custom_ops_processed + ): + continue + if opname_base in ( + "BUILD_LIST", + "BUILD_SET", + "BUILD_SET_UNPACK", + "BUILD_TUPLE", + "BUILD_TUPLE_UNPACK", + ): + v = token.attr + + is_LOAD_CLOSURE = False + if opname_base == "BUILD_TUPLE": + # If is part of a "load_closure", then it is not part of a + # "list". + is_LOAD_CLOSURE = True + for j in range(v): + if tokens[i - j - 1].kind != "LOAD_CLOSURE": + is_LOAD_CLOSURE = False + break + if is_LOAD_CLOSURE: + rule = "load_closure ::= %s%s" % (("LOAD_CLOSURE " * v), opname) + self.add_unique_rule(rule, opname, token.attr, customize) + + elif opname_base == "BUILD_LIST": + v = token.attr + if v == 0: + rule_str = """ + list ::= BUILD_LIST_0 + list_unpack ::= BUILD_LIST_0 expr LIST_EXTEND + list ::= list_unpack + """ + self.add_unique_doc_rules(rule_str, customize) + + elif opname == "BUILD_TUPLE_UNPACK_WITH_CALL": + # FIXME: should this be parameterized by EX value? + self.addRule( + """expr ::= call_ex_kw3 + call_ex_kw3 ::= expr + build_tuple_unpack_with_call + expr + CALL_FUNCTION_EX_KW + """, + nop_func, + ) + + if not is_LOAD_CLOSURE or v == 0: + # We do this complicated test to speed up parsing of + # pathelogically long literals, especially those over 1024. + build_count = token.attr + thousands = build_count // 1024 + thirty32s = (build_count // 32) % 32 + if thirty32s > 0: + rule = "expr32 ::=%s" % (" expr" * 32) + self.add_unique_rule(rule, opname_base, build_count, customize) + pass + if thousands > 0: + self.add_unique_rule( + "expr1024 ::=%s" % (" expr32" * 32), + opname_base, + build_count, + customize, + ) + pass + collection = opname_base[opname_base.find("_") + 1 :].lower() + rule = ( + ("%s ::= " % collection) + + "expr1024 " * thousands + + "expr32 " * thirty32s + + "expr " * (build_count % 32) + + opname + ) + self.add_unique_rules(["expr ::= %s" % collection, rule], customize) + continue + continue + + elif opname == "BUILD_STRING_2": + self.addRule( + """ + expr ::= formatted_value_debug + formatted_value_debug ::= LOAD_STR formatted_value2 BUILD_STRING_2 + formatted_value_debug ::= LOAD_STR formatted_value1 BUILD_STRING_2 + """, + nop_func, + ) + custom_ops_processed.add(opname) + + elif opname == "BUILD_STRING_3": + self.addRule( + """ + expr ::= formatted_value_debug + formatted_value_debug ::= LOAD_STR formatted_value2 LOAD_STR BUILD_STRING_3 + formatted_value_debug ::= LOAD_STR formatted_value1 LOAD_STR BUILD_STRING_3 + """, + nop_func, + ) + custom_ops_processed.add(opname) + + elif opname == "LOAD_CLOSURE": + self.addRule("""load_closure ::= LOAD_CLOSURE+""", nop_func) + + elif opname == "LOOKUP_METHOD": + # A PyPy speciality - DRY with parse3 + self.addRule( + """ + expr ::= attribute + attribute ::= expr LOOKUP_METHOD + """, + nop_func, + ) + custom_ops_processed.add(opname) + + elif opname == "MAKE_FUNCTION_8": + if "LOAD_DICTCOMP" in self.seen_ops: + # Is there something general going on here? + rule = """ + dict_comp ::= load_closure LOAD_DICTCOMP LOAD_STR + MAKE_FUNCTION_8 expr + GET_ITER CALL_FUNCTION_1 + """ + self.addRule(rule, nop_func) + elif "LOAD_SETCOMP" in self.seen_ops: + rule = """ + set_comp ::= load_closure LOAD_SETCOMP LOAD_STR + MAKE_FUNCTION_CLOSURE expr + GET_ITER CALL_FUNCTION_1 + """ + self.addRule(rule, nop_func) + elif opname == "SETUP_WITH": + rules_str = """ + stmt ::= with_as_pass + with_as_pass ::= expr + SETUP_WITH store pass + POP_BLOCK BEGIN_FINALLY COME_FROM_WITH + with_suffix + """ + self.addRule(rules_str, nop_func) + def reduce_is_invalid(self, rule, ast, tokens, first, last): - invalid = super(Python38Parser, - self).reduce_is_invalid(rule, ast, - tokens, first, last) + invalid = super(Python38Parser, self).reduce_is_invalid( + rule, ast, tokens, first, last + ) self.remove_rules_38() if invalid: return invalid @@ -346,13 +627,13 @@ class Python38ParserSingle(Python38Parser, PythonParserSingle): p = Python38Parser() p.remove_rules_38() p.check_grammar() - from uncompyle6 import PYTHON_VERSION, IS_PYPY + from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE - if PYTHON_VERSION == 3.8: + if PYTHON_VERSION_TRIPLE[:2] == (3, 8): lhs, rhs, tokens, right_recursive, dup_rhs = p.check_sets() from uncompyle6.scanner import get_scanner - s = get_scanner(PYTHON_VERSION, IS_PYPY) + s = get_scanner(PYTHON_VERSION_TRIPLE, IS_PYPY) opcode_set = set(s.opc.opname).union( set( """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM @@ -369,7 +650,9 @@ class Python38ParserSingle(Python38Parser, PythonParserSingle): remain_tokens = set(remain_tokens) - opcode_set print(remain_tokens) import sys + if len(sys.argv) > 1: from spark_parser.spark import rule2str + for rule in sorted(p.rule2name.items()): print(rule2str(rule[0])) diff --git a/uncompyle6/parsers/reducecheck/__init__.py b/uncompyle6/parsers/reducecheck/__init__.py index a2bf8bb46..4f99ab2fd 100644 --- a/uncompyle6/parsers/reducecheck/__init__.py +++ b/uncompyle6/parsers/reducecheck/__init__.py @@ -1,15 +1,18 @@ -from uncompyle6.parsers.reducecheck.and_check import * -from uncompyle6.parsers.reducecheck.aug_assign import * -from uncompyle6.parsers.reducecheck.except_handler import * -from uncompyle6.parsers.reducecheck.except_handler_else import * -from uncompyle6.parsers.reducecheck.ifelsestmt import * -from uncompyle6.parsers.reducecheck.iflaststmt import * -from uncompyle6.parsers.reducecheck.ifstmt import * -from uncompyle6.parsers.reducecheck.ifstmts_jump import * -from uncompyle6.parsers.reducecheck.or_check import * -from uncompyle6.parsers.reducecheck.testtrue import * -from uncompyle6.parsers.reducecheck.tryelsestmt import * -from uncompyle6.parsers.reducecheck.tryexcept import * -from uncompyle6.parsers.reducecheck.tryelsestmtl3 import * -from uncompyle6.parsers.reducecheck.while1elsestmt import * -from uncompyle6.parsers.reducecheck.while1stmt import * +from uncompyle6.parsers.reducecheck.and_check import * # noqa +from uncompyle6.parsers.reducecheck.aug_assign import * # noqa +from uncompyle6.parsers.reducecheck.except_handler import * # noqa +from uncompyle6.parsers.reducecheck.except_handler_else import * # noqa +from uncompyle6.parsers.reducecheck.ifelsestmt import * # noqa +from uncompyle6.parsers.reducecheck.ifelsestmt2 import * # noqa +from uncompyle6.parsers.reducecheck.iflaststmt import * # noqa +from uncompyle6.parsers.reducecheck.ifstmt import * # noqa +from uncompyle6.parsers.reducecheck.ifstmt2 import * # noqa +from uncompyle6.parsers.reducecheck.ifstmts_jump import * # noqa +from uncompyle6.parsers.reducecheck.for_block_check import * # noqa +from uncompyle6.parsers.reducecheck.or_check import * # noqa +from uncompyle6.parsers.reducecheck.testtrue import * # noqa +from uncompyle6.parsers.reducecheck.tryelsestmt import * # noqa +from uncompyle6.parsers.reducecheck.tryexcept import * # noqa +from uncompyle6.parsers.reducecheck.tryelsestmtl3 import * # noqa +from uncompyle6.parsers.reducecheck.while1elsestmt import * # noqa +from uncompyle6.parsers.reducecheck.while1stmt import * # noqa diff --git a/uncompyle6/parsers/reducecheck/and_check.py b/uncompyle6/parsers/reducecheck/and_check.py index 5e5908217..afffbcb33 100644 --- a/uncompyle6/parsers/reducecheck/and_check.py +++ b/uncompyle6/parsers/reducecheck/and_check.py @@ -1,7 +1,13 @@ -# Copyright (c) 2020 Rocky Bernstein +# Copyright (c) 2020, 2022 Rocky Bernstein +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or -def and_check(self, lhs, n, rule, ast, tokens, first, last): + +def and_invalid( + self, lhs: str, n: int, rule, ast, tokens: list, first: int, last: int + ) -> bool: jmp = ast[1] if jmp.kind.startswith("jmp_"): if last == n: diff --git a/uncompyle6/parsers/reducecheck/and_not_check.py b/uncompyle6/parsers/reducecheck/and_not_check.py index a246e6302..10c53ddcc 100644 --- a/uncompyle6/parsers/reducecheck/and_not_check.py +++ b/uncompyle6/parsers/reducecheck/and_not_check.py @@ -1,9 +1,7 @@ -# Copyright (c) 2020 Rocky Bernstein +# Copyright (c) 2020, 2025 Rocky Bernstein -def and_not_check( - self, lhs, n, rule, ast, tokens, first, last -) -> bool: +def and_not_check(self, lhs, n, rule, ast, tokens: list, first: int, last: int) -> bool: jmp = ast[1] if jmp.kind.startswith("jmp_"): if last == n: diff --git a/uncompyle6/parsers/reducecheck/except_handler.py b/uncompyle6/parsers/reducecheck/except_handler.py index 8d2341d9b..db1a59543 100644 --- a/uncompyle6/parsers/reducecheck/except_handler.py +++ b/uncompyle6/parsers/reducecheck/except_handler.py @@ -1,7 +1,8 @@ -# Copyright (c) 2020 Rocky Bernstein +# Copyright (c) 2020, 2025 Rocky Bernstein -def except_handler(self, lhs, n, rule, ast, tokens, first, last): - end_token = tokens[last-1] + +def except_handler(self, lhs, n: int, rule, ast, tokens: list, first: int, last: int): + end_token = tokens[last - 1] # print("XXX", first, last) # for t in range(first, last): @@ -13,7 +14,7 @@ def except_handler(self, lhs, n, rule, ast, tokens, first, last): if self.version[:2] == (1, 4): return False - # Make sure come froms all come from within "except_handler". - if end_token != "COME_FROM": + # Make sure COME_FROMs froms come from within "except_handler". + if end_token.kind != "COME_FROM": return False - return end_token.attr < tokens[first].offset + return end_token.attr is not None and end_token.attr < tokens[first].offset diff --git a/uncompyle6/parsers/reducecheck/for_block_check.py b/uncompyle6/parsers/reducecheck/for_block_check.py new file mode 100644 index 000000000..28d67dddd --- /dev/null +++ b/uncompyle6/parsers/reducecheck/for_block_check.py @@ -0,0 +1,72 @@ +# Copyright (c) 2022 Rocky Bernstein + +from uncompyle6.scanners.tok import Token + + +def for_block_invalid(self, lhs, n, rule, tree, tokens, first: int, last: int) -> bool: + # print("XXX", first, last) + # for t in range(first, last): + # print(tokens[t]) + # print("=" * 30) + + if rule == ( + "for_block", + ("l_stmts_opt", "JUMP_ABSOLUTE", "JUMP_BACK", "JUMP_BACK"), + ): + # Check that the two JUMP_BACK's go to the same place. + jump_back1 = tokens[last - 2] + jump_back2 = tokens[last - 1] + if jump_back1.attr != jump_back2.attr: + return True + + # Also check that JUMP_ABSOLUTE jumps to the JUMP_BACK. + # In this situation the JUMP_ABSOLUTE and a JUMP_BACK + # is not needed, but it seems to be there anyway. + + jump_absolute = tokens[last - 3] + if jump_absolute.attr != jump_back2.offset: + return True + + # Right now all of this is known to happen only in Python 2.7. + if self.version[:2] == (2, 7): + return False + + if len(rule[1]) <= 1 or not tree: + return False + + come_froms = tree[-1] + # This is complicated, but note that the JUMP_IF instruction comes immediately + # *before* _ifstmts_jump so that's what we have to test + # the COME_FROM against. This can be complicated by intervening + # POP_TOP, and pseudo COME_FROM, ELSE instructions + # + pop_jump_index = first - 1 + while pop_jump_index > 0 and tokens[pop_jump_index] in ( + "ELSE", + "POP_TOP", + "JUMP_FORWARD", + "COME_FROM", + ): + pop_jump_index -= 1 + + # FIXME: something is fishy when and EXTENDED ARG is needed before the + # pop_jump_index instruction to get the argument. In this case, the + # _ifsmtst_jump can jump to a spot beyond the ``come_froms``. + # That is going on in the non-EXTENDED_ARG case is that the POP_JUMP_IF + # jumps to a JUMP_(FORWARD) which is changed into an EXTENDED_ARG POP_JUMP_IF + # to the jumped forwarded address + if tokens[pop_jump_index].attr > 256: + return False + + pop_jump_offset = tokens[pop_jump_index].off2int(prefer_last=False) + if isinstance(come_froms, Token): + if tokens[pop_jump_index].attr < pop_jump_offset and tree[0] != "pass": + # This is a jump backwards to a loop. All bets are off here when there the + # unless statement is "pass" which has no instructions associated with it. + return False + return come_froms.attr is not None and pop_jump_offset > come_froms.attr + + elif len(come_froms) == 0: + return False + else: + return pop_jump_offset > come_froms[-1].attr diff --git a/uncompyle6/parsers/reducecheck/ifelsestmt.py b/uncompyle6/parsers/reducecheck/ifelsestmt.py index d0f390da9..e5f295a72 100644 --- a/uncompyle6/parsers/reducecheck/ifelsestmt.py +++ b/uncompyle6/parsers/reducecheck/ifelsestmt.py @@ -1,10 +1,10 @@ -# Copyright (c) 2020-2021 Rocky Bernstein +# Copyright (c) 2020-2022 Rocky Bernstein from uncompyle6.scanners.tok import Token IFELSE_STMT_RULES = frozenset( [ - ( + ( "ifelsestmt", ( "testexpr", @@ -52,6 +52,15 @@ "else_suitec", ), ), + ( + "ifelsestmtc", + ( + "testexpr", + "c_stmts_opt", + "JUMP_ABSOLUTE", + "else_suitec", + ), + ), ( "ifelsestmt", ( @@ -72,9 +81,19 @@ "come_froms", ), ), + ( + "ifelsestmtc", + ("testexpr", "c_stmts_opt", "JUMP_FORWARD", "else_suite", "come_froms"), + ), ( "ifelsestmt", - ("testexpr", "c_stmts", "come_froms", "else_suite", "come_froms",), + ( + "testexpr", + "c_stmts", + "come_froms", + "else_suite", + "come_froms", + ), ), ( "ifelsestmt", @@ -112,7 +131,8 @@ "stmts", "jf_cfs", "\\e_else_suite_opt", - "\\e_opt_come_from_except") + "\\e_opt_come_from_except", + ), ), ( "ifelsestmt", @@ -121,12 +141,14 @@ "stmts", "jf_cfs", "\\e_else_suite_opt", - "opt_come_from_except") + "opt_come_from_except", + ), ), - ]) + ] +) -def ifelsestmt(self, lhs, n, rule, ast, tokens, first, last): +def ifelsestmt(self, lhs, n, rule, tree, tokens, first, last): if (last + 1) < n and tokens[last + 1] == "COME_FROM_LOOP" and lhs != "ifelsestmtc": # ifelsestmt jumped outside of loop. No good. return True @@ -136,55 +158,58 @@ def ifelsestmt(self, lhs, n, rule, ast, tokens, first, last): # print(tokens[t]) # print("=" * 30) + first_offset = tokens[first].off2int() + if rule not in IFELSE_STMT_RULES: # print("XXX", rule) return False # Avoid if/else where the "then" is a "raise_stmt1" for an # assert statement. Parse this as an "assert" instead. - stmts = ast[1] + stmts = tree[1] if stmts in ("c_stmts",) and len(stmts) == 1: raise_stmt1 = stmts[0] - if ( - raise_stmt1 == "raise_stmt1" and - raise_stmt1[0] in ("LOAD_ASSERT",) - ): + if raise_stmt1 == "raise_stmt1" and raise_stmt1[0] in ("LOAD_ASSERT",): return True - # Make sure all of the "come froms" offset at the + # Make sure all the offsets from the "COME_FROMs" at the # end of the "if" come from somewhere inside the "if". # Since the come_froms are ordered so that lowest # offset COME_FROM is last, it is sufficient to test # just the last one. - if len(ast) == 5: - end_come_froms = ast[-1] + if len(tree) == 5: + end_come_froms = tree[-1] if end_come_froms.kind != "else_suite" and self.version >= (3, 0): if end_come_froms == "opt_come_from_except" and len(end_come_froms) > 0: end_come_froms = end_come_froms[0] if not isinstance(end_come_froms, Token): if len(end_come_froms): - return tokens[first].offset > end_come_froms[-1].attr - elif tokens[first].offset > end_come_froms.attr: + return first_offset > end_come_froms[-1].attr + elif first_offset > end_come_froms.attr: return True # FIXME: There is weirdness in the grammar we need to work around. # we need to clean up the grammar. if self.version < (3, 0): - last_token = ast[-1] + last_token = tree[-1] else: last_token = tokens[last] - if last_token == "COME_FROM" and tokens[first].offset > last_token.attr: - if self.version < (3, 0) and self.insts[self.offset2inst_index[last_token.attr]].opname != "SETUP_LOOP": + if last_token == "COME_FROM" and first_offset > last_token.attr: + if ( + self.version < (3, 0) + and self.insts[self.offset2inst_index[last_token.attr]].opname + != "SETUP_LOOP" + ): return True - testexpr = ast[0] + testexpr = tree[0] # Check that the condition portion of the "if" # jumps to the "else" part. if testexpr[0] in ("testtrue", "testfalse"): if_condition = testexpr[0] - else_suite = ast[3] + else_suite = tree[3] assert else_suite.kind.startswith("else_suite") if len(if_condition) > 1 and if_condition[1].kind.startswith("jmp_"): @@ -196,7 +221,6 @@ def ifelsestmt(self, lhs, n, rule, ast, tokens, first, last): else: jmp_target = int(jmp[0].pattr) - # Below we check that jmp_target is jumping to a feasible # location. It should be to the transition after the "then" # block and to the beginning of the "else" block. @@ -205,7 +229,7 @@ def ifelsestmt(self, lhs, n, rule, ast, tokens, first, last): # FIXME: the below logic for jf_cfs could probably be # simplified. - jump_else_end = ast[2] + jump_else_end = tree[2] if jump_else_end == "jf_cf_pop": jump_else_end = jump_else_end[0] @@ -237,7 +261,34 @@ def ifelsestmt(self, lhs, n, rule, ast, tokens, first, last): if jump_else_end[-1].off2int() != jmp_target: return True - if tokens[first].off2int() > jmp_target: + if first_offset > jmp_target: + # A backward or loop jump from the end of an "else" + # clause before the beginning of the "if" test is okay + # only if we are trying to match or reduce an "if" + # statement of the kind that can occur only inside a + # loop construct. + + if lhs in ("ifelsestmtl", "ifelsestmtc"): + jump_false = jmp + if ( + tree[2].kind in ("JUMP_FORWARD", "JUMP_ABSOLUTE") + and jump_false == "jmp_false" + and len(else_suite) == 1 + ): + suite_stmts = else_suite[0] + continue_stmt = suite_stmts[0] + if ( + suite_stmts in ("suite_stmts", "c_stmts") + and len(suite_stmts) == 1 + and continue_stmt == "continue" + and jump_false[0].attr == continue_stmt[0].attr + ): + # for ...: + # if ...: + # ... + # else: + # continue + return False return True return (jmp_target > last_offset) and tokens[last] != "JUMP_FORWARD" diff --git a/uncompyle6/parsers/reducecheck/ifelsestmt2.py b/uncompyle6/parsers/reducecheck/ifelsestmt2.py new file mode 100644 index 000000000..750c522a6 --- /dev/null +++ b/uncompyle6/parsers/reducecheck/ifelsestmt2.py @@ -0,0 +1,149 @@ +# Copyright (c) 2020-2022 Rocky Bernstein +""" +If/else statement reduction check for Python 2.6 (and older?) +""" + +IFELSE_STMT_RULES = frozenset( + [ + ( + "ifelsestmt", + ( + "testexpr_then", + "pass", + "filler", + "else_suitel", + "COME_FROM", + "POP_TOP", + ), + ), + ( + "ifelsestmt", + ( + "testexpr_then", + "c_stmts_opt", + "\\e_filler", + "else_suitel", + "come_froms", + "POP_TOP", + ), + ), + ( + "ifelsestmt", + ( + "testexpr_then", + "\\e_c_stmts_opt", + "\\e_filler", + "else_suitel", + "come_froms", + "POP_TOP", + ), + ), + # We may do something like add these in the future: + ] +) + + +def ifelsestmt2(self, lhs, n, rule, tree, tokens, first, last): + if (last + 1) < n and tokens[last + 1] == "COME_FROM_LOOP" and lhs != "ifelsestmtc": + # ifelsestmt jumped outside of loop. No good. + return True + + # print("XXX", first, last) + # for t in range(first, last): + # print(tokens[t]) + # print("=" * 30) + + if rule not in IFELSE_STMT_RULES: + # print("XXX", rule) + return False + + # Avoid if/else where the "then" is a "raise_stmt1" for an + # assert statement. Parse this as an "assert" instead. + stmts = tree[1] + if stmts in ("c_stmts",) and len(stmts) == 1: + raise_stmt1 = stmts[0] + if raise_stmt1 == "raise_stmt1" and raise_stmt1[0] in ("LOAD_ASSERT",): + return True + + # Make sure all of the "come_froms" offset at the + # end of the "if" come from somewhere inside the "if". + # Since the come_froms are ordered so that lowest + # offset COME_FROM is last, it is sufficient to test + # just the last one. + if len(tree) == 6 and tree[-1] == "POP_TOP": + # FIXME: There is weirdness in the grammar we need to work around. + # we need to clean up the grammar. + last_token = tree[-2] + if last_token == "COME_FROM" and tokens[first].offset > last_token.attr: + if ( + self.insts[self.offset2inst_index[last_token.attr]].opname + != "SETUP_LOOP" + ): + return True + + testexpr = tree[0] + + # Check that the condition portion of the "if" + # jumps to the "else" part. + if testexpr[0] in ("testtrue", "testfalse", "testfalse_then"): + if_condition = testexpr[0] + + else_suite = tree[3] + assert else_suite.kind.startswith("else_suite") + + if len(if_condition) > 1 and if_condition[1].kind.startswith("jmp_"): + if last == n: + last -= 1 + jmp = if_condition[1] + jmp_target = int(jmp[0].pattr) + + # Below we check that jmp_target is jumping to a feasible + # location. It should be to the transition after the "then" + # block and to the beginning of the "else" block. + # However the "if/else" is inside a loop the false test can be + # back to the loop. + + # FIXME: the below logic for jf_cfs could probably be + # simplified. + if tree[2] == "filler": + jump_else_end = tree[3] + else: + jump_else_end = tree[2] + + if jump_else_end == "jf_cfs": + jump_else_end = jump_else_end[0] + + if jump_else_end == "JUMP_FORWARD": + endif_target = int(jump_else_end.pattr) + last_offset = tokens[last].off2int() + if endif_target != last_offset: + return True + last_offset = tokens[last].off2int(prefer_last=False) + if jmp_target <= last_offset: + # jmp_target should be jumping to the end of the if/then/else + # but is it jumping to the beginning of the "else" or before + return True + if ( + jump_else_end in ("jf_cfs", "jump_forward_else") + and jump_else_end[0] == "JUMP_FORWARD" + ): + # If the "else" jump jumps before the end of the the "if .. else end", then this + # is not this kind of "ifelsestmt". + jump_else_forward = jump_else_end[0] + jump_else_forward_target = jump_else_forward.attr + if jump_else_forward_target < last_offset: + return True + pass + if ( + jump_else_end in ("jb_elsec", "jb_elsel", "jf_cfs", "jb_cfs") + and jump_else_end[-1] == "COME_FROM" + ): + if jump_else_end[-1].off2int() != jmp_target: + return True + + if tokens[first].off2int() > jmp_target: + return True + + return (jmp_target > last_offset) and tokens[last] != "JUMP_FORWARD" + + return False diff --git a/uncompyle6/parsers/reducecheck/iflaststmt.py b/uncompyle6/parsers/reducecheck/iflaststmt.py index 93495d24a..0118672e3 100644 --- a/uncompyle6/parsers/reducecheck/iflaststmt.py +++ b/uncompyle6/parsers/reducecheck/iflaststmt.py @@ -1,8 +1,26 @@ -# Copyright (c) 2020 Rocky Bernstein +# Copyright (c) 2020, 2022 Rocky Bernstein +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . -def iflaststmt(self, lhs, n, rule, ast, tokens, first, last): - testexpr = ast[0] +def iflaststmt( + self, lhs: str, n: int, rule, tree, tokens: list, first: int, last: int +) -> bool: + testexpr = tree[0] + + # print("XXX", first, last, rule) + # for t in range(first, last): print(tokens[t]) + # print("="*40) if testexpr[0] in ("testtrue", "testfalse"): diff --git a/uncompyle6/parsers/reducecheck/ifstmt.py b/uncompyle6/parsers/reducecheck/ifstmt.py index d2abe4902..0d21460d8 100644 --- a/uncompyle6/parsers/reducecheck/ifstmt.py +++ b/uncompyle6/parsers/reducecheck/ifstmt.py @@ -1,13 +1,16 @@ -# Copyright (c) 2020 Rocky Bernstein +# Copyright (c) 2020, 2023 Rocky Bernstein def ifstmt(self, lhs, n, rule, ast, tokens, first, last): + + first_offset = tokens[first].off2int(prefer_last=False) + if lhs == "ifstmtl": if last == n: last -= 1 pass if tokens[last].attr and isinstance(tokens[last].attr, int): - if tokens[first].offset >= tokens[last].attr: + if first_offset >= tokens[last].attr: return True pass pass @@ -36,7 +39,7 @@ def ifstmt(self, lhs, n, rule, ast, tokens, first, last): if tokens[l] == "JUMP_FORWARD": return tokens[l].attr != pjif_target return True - elif lhs == "ifstmtl" and tokens[first].off2int() > pjif_target: + elif lhs == "ifstmtl" and first_offset > pjif_target: # A conditional JUMP to the loop is expected for "ifstmtl" return False pass @@ -55,7 +58,7 @@ def ifstmt(self, lhs, n, rule, ast, tokens, first, last): if len(test) > 1 and test[1].kind.startswith("jmp_"): jmp_target = test[1][0].attr if ( - tokens[first].off2int(prefer_last=True) + first_offset <= jmp_target < tokens[last].off2int(prefer_last=False) ): diff --git a/uncompyle6/parsers/reducecheck/ifstmt2.py b/uncompyle6/parsers/reducecheck/ifstmt2.py new file mode 100644 index 000000000..3cf098a0a --- /dev/null +++ b/uncompyle6/parsers/reducecheck/ifstmt2.py @@ -0,0 +1,98 @@ +# Copyright (c) 2022 Rocky Bernstein +""" +If statement reduction check for Python 2.6 (and older?) +""" + + +def ifstmt2(self, lhs, n, rule, ast, tokens, first, last): + + # for t in range(first, last): + # print(tokens[t]) + # print("=" * 30) + + if lhs == "ifstmtl": + if last == n: + last -= 1 + pass + if tokens[last].attr and isinstance(tokens[last].attr, int): + if tokens[first].offset >= tokens[last].attr: + return True + pass + pass + + # Make sure jumps don't extend beyond the end of the if statement. + l = last + if l == n: + l -= 1 + if isinstance(tokens[l].offset, str): + last_offset = int(tokens[l].offset.split("_")[0], 10) + else: + last_offset = tokens[l].offset + for i in range(first, l): + t = tokens[i] + # instead of POP_JUMP_IF, should we use op attributes? + if t.kind in ("JUMP_IF_FALSE", "JUMP_IF_TRUE"): + jif_target = int(t.pattr) + target_instr = self.insts[self.offset2inst_index[jif_target]] + if lhs == "iflaststmtl" and target_instr.opname == "JUMP_ABSOLUTE": + jif_target = target_instr.arg + if jif_target > last_offset: + # In come cases, where we have long bytecode, a + # "POP_JUMP_IF_TRUE/FALSE" offset might be too + # large for the instruction; so instead it + # jumps to a JUMP_FORWARD. Allow that here. + if tokens[l] == "JUMP_FORWARD": + return tokens[l].attr != jif_target + return True + elif lhs == "ifstmtl" and tokens[first].off2int() > jif_target: + # A conditional JUMP to the loop is expected for "ifstmtl" + return False + pass + pass + pass + + if ast: + testexpr = ast[0] + + if (last + 1) < n and tokens[last + 1] == "COME_FROM_LOOP": + # iflastsmtl jumped outside of loop. No good. + return True + + if testexpr[0] in ("testtrue", "testfalse"): + test = testexpr[0] + jmp = test[1] + if len(test) > 1 and jmp.kind.startswith("jmp_"): + jmp_target = int(jmp[0].pattr) + if last == len(tokens): + last -= 1 + + if_end_offset = tokens[last].off2int(prefer_last=False) + if ( + tokens[first].off2int(prefer_last=True) + <= jmp_target + < if_end_offset + ): + # In 2.6 (and before?) we need to check if the previous instruction + # is a jump to the last token. If so, testexpr is negated? and so + # jmp_target < if_end_offset. + previous_inst_index = self.offset2inst_index[jmp_target] - 1 + previous_inst = self.insts[previous_inst_index] + if previous_inst.opname != "JUMP_ABSOLUTE" and previous_inst.argval != if_end_offset: + return True + # jmp_target less than tokens[first] is okay - is to a loop + # jmp_target equal tokens[last] is also okay: normal non-optimized non-loop jump + if jmp_target > tokens[last].off2int(): + # One more weird case to look out for + # if c1: + # if c2: # Jumps around the *outer* "else" + # ... + # else: + if jmp_target == tokens[last - 1].attr: + return False + if last < n and tokens[last].kind.startswith("JUMP"): + return False + return True + + pass + pass + return False diff --git a/uncompyle6/parsers/reducecheck/ifstmts_jump.py b/uncompyle6/parsers/reducecheck/ifstmts_jump.py index d54469053..ec85d625b 100644 --- a/uncompyle6/parsers/reducecheck/ifstmts_jump.py +++ b/uncompyle6/parsers/reducecheck/ifstmts_jump.py @@ -4,7 +4,6 @@ def ifstmts_jump(self, lhs, n, rule, ast, tokens, first, last): - if len(rule[1]) <= 1 or not ast: return False @@ -24,7 +23,7 @@ def ifstmts_jump(self, lhs, n, rule, ast, tokens, first, last): pop_jump_index -= 1 # FIXME: something is fishy when and EXTENDED ARG is needed before the - # pop_jump_index instruction to get the argment. In this case, the + # pop_jump_index instruction to get the argument. In this case, the # _ifsmtst_jump can jump to a spot beyond the come_froms. # That is going on in the non-EXTENDED_ARG case is that the POP_JUMP_IF # jumps to a JUMP_(FORWARD) which is changed into an EXTENDED_ARG POP_JUMP_IF @@ -34,16 +33,11 @@ def ifstmts_jump(self, lhs, n, rule, ast, tokens, first, last): pop_jump_offset = tokens[pop_jump_index].off2int(prefer_last=False) if isinstance(come_froms, Token): - if ( - tokens[pop_jump_index].attr < pop_jump_offset and ast[0] != "pass" - ): + if tokens[pop_jump_index].attr < pop_jump_offset and ast[0] != "pass": # This is a jump backwards to a loop. All bets are off here when there the # unless statement is "pass" which has no instructions associated with it. return False - return ( - come_froms.attr is not None - and pop_jump_offset > come_froms.attr - ) + return come_froms.attr is not None and pop_jump_offset > come_froms.attr elif len(come_froms) == 0: return False diff --git a/uncompyle6/parsers/reducecheck/joined_str_check.py b/uncompyle6/parsers/reducecheck/joined_str_check.py new file mode 100644 index 000000000..c6f67441b --- /dev/null +++ b/uncompyle6/parsers/reducecheck/joined_str_check.py @@ -0,0 +1,47 @@ +# Copyright (c) 2022 Rocky Bernstein +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +def joined_str_invalid( + self, lhs: str, n: int, rule, tree, tokens: list, first: int, last: int +) -> bool: + # In Python 3.8, there is a new "=" specifier. + # See https://docs.python.org/3/whatsnew/3.8.html#f-strings-support-for-self-documenting-expressions-and-debugging + # We detect this here inside joined_str by looking for an + # expr->LOAD_STR which has an "=" added at the end + # and is equal without the "=" to expr->formated_value2->LOAD_CONST + # converted to a string. + expr1 = tree[0] + if expr1 != "expr": + return False + load_str = expr1[0] + if load_str != "LOAD_STR": + return False + format_value_equal = load_str.attr + if format_value_equal[-1] != "=": + return False + expr2 = tree[1] + if expr2 != "expr": + return False + formatted_value = expr2[0] + if not formatted_value.kind.startswith("formatted_value"): + return False + expr2a = formatted_value[0] + if expr2a != "expr": + return False + load_const = expr2a[0] + if load_const == "LOAD_CONST": + format_value2 = load_const.attr + return str(format_value2) == format_value_equal[:-1] + return True diff --git a/uncompyle6/parsers/reducecheck/pop_return.py b/uncompyle6/parsers/reducecheck/pop_return.py new file mode 100644 index 000000000..e9da02645 --- /dev/null +++ b/uncompyle6/parsers/reducecheck/pop_return.py @@ -0,0 +1,10 @@ +# Copyright (c) 2020 Rocky Bernstein + + +def pop_return_check( + self, lhs: str, n: int, rule, ast, tokens: list, first: int, last: int +) -> bool: + # If the first instruction of return_expr (the instruction after POP_TOP) is + # has a linestart, then the POP_TOP was probably part of the previous + # statement, such as a call() where the return value is discarded. + return tokens[first + 1].linestart diff --git a/uncompyle6/parsers/reducecheck/tryexcept.py b/uncompyle6/parsers/reducecheck/tryexcept.py index ffad50619..06097d0a5 100644 --- a/uncompyle6/parsers/reducecheck/tryexcept.py +++ b/uncompyle6/parsers/reducecheck/tryexcept.py @@ -1,16 +1,17 @@ -# Copyright (c) 2020, 2022 Rocky Bernstein +# Copyright (c) 2020, 2022, 2024 Rocky Bernstein + def tryexcept(self, lhs, n, rule, ast, tokens, first, last): come_from_except = ast[-1] if rule == ( - "try_except", - ( - "SETUP_EXCEPT", - "suite_stmts_opt", - "POP_BLOCK", - "except_handler", - "opt_come_from_except", - ), + "try_except", + ( + "SETUP_EXCEPT", + "suite_stmts_opt", + "POP_BLOCK", + "except_handler", + "opt_come_from_except", + ), ): if come_from_except[0] == "COME_FROM": # There should be at least two COME_FROMs, one from an @@ -20,31 +21,31 @@ def tryexcept(self, lhs, n, rule, ast, tokens, first, last): pass elif rule == ( - "try_except", - ( - "SETUP_EXCEPT", - "suite_stmts_opt", - "POP_BLOCK", - "except_handler", - "COME_FROM", - ), + "try_except", + ( + "SETUP_EXCEPT", + "suite_stmts_opt", + "POP_BLOCK", + "except_handler", + "COME_FROM", + ), ): return come_from_except.attr < tokens[first].offset elif rule == ( - 'try_except', - ( - 'SETUP_EXCEPT', - 'suite_stmts_opt', - 'POP_BLOCK', - 'except_handler', - '\\e_opt_come_from_except' - ), + "try_except", + ( + "SETUP_EXCEPT", + "suite_stmts_opt", + "POP_BLOCK", + "except_handler", + "\\e_opt_come_from_except", + ), ): # Find END_FINALLY. for i in range(last, first, -1): if tokens[i] == "END_FINALLY": - jump_before_finally = tokens[i-1] + jump_before_finally = tokens[i - 1] if jump_before_finally.kind.startswith("JUMP"): if jump_before_finally == "JUMP_FORWARD": # If there is a JUMP_FORWARD before @@ -52,7 +53,9 @@ def tryexcept(self, lhs, n, rule, ast, tokens, first, last): # beyond tokens[last].off2int() then # this is a try/else rather than an # try (no else). - return tokens[i-1].attr > tokens[last].off2int(prefer_last=True) + return tokens[i - 1].attr > tokens[last].off2int( + prefer_last=True + ) elif jump_before_finally == "JUMP_BACK": # If there is a JUMP_BACK before the # END_FINALLY then this is a looping @@ -61,8 +64,10 @@ def tryexcept(self, lhs, n, rule, ast, tokens, first, last): # jump or this is a try/else rather # than an try (no else). except_handler = ast[3] - if (except_handler == "except_handler" and - except_handler[0] == "JUMP_FORWARD"): + if ( + except_handler == "except_handler" + and except_handler[0] == "JUMP_FORWARD" + ): return True return False pass diff --git a/uncompyle6/parsers/reducecheck/while1stmt.py b/uncompyle6/parsers/reducecheck/while1stmt.py index 43d3b7529..4b4e05ad3 100644 --- a/uncompyle6/parsers/reducecheck/while1stmt.py +++ b/uncompyle6/parsers/reducecheck/while1stmt.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 Rocky Bernstein +# Copyright (c) 2020, 2022 Rocky Bernstein def while1stmt(self, lhs, n, rule, ast, tokens, first, last): @@ -37,15 +37,14 @@ def while1stmt(self, lhs, n, rule, ast, tokens, first, last): if tokens[loop_end] == "JUMP_BACK": loop_end += 1 loop_end_offset = tokens[loop_end].off2int(prefer_last=False) - for t in range(first+1, loop_end): + for t in range(first + 1, loop_end): token = tokens[t] # token could be a pseudo-op like "LOAD_STR", which is not in - # self.opc. We will replace that with LOAD_CONST as an - # example of an instruction that is not in self.opc.JUMP_OPS - if self.opc.opmap.get(token.kind, "LOAD_CONST") in self.opc.JUMP_OPS: + # token.opc. We will replace that with LOAD_CONST as an + # example of an instruction that is not in token.opc.JUMP_OPS + if token.opc.opmap.get(token.kind, "LOAD_CONST") in token.opc.JUMP_OPS: if token.attr >= loop_end_offset: return True - # SETUP_LOOP location must jump either to the last token or the token after the last one return tokens[first].attr not in (offset, offset + 2) diff --git a/uncompyle6/parsers/reducecheck/whilestmt.py b/uncompyle6/parsers/reducecheck/whilestmt.py new file mode 100644 index 000000000..6d8ade009 --- /dev/null +++ b/uncompyle6/parsers/reducecheck/whilestmt.py @@ -0,0 +1,31 @@ +# Copyright (c) 2020 Rocky Bernstein +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +def whilestmt( + self, lhs: str, n: int, rule, tree, tokens: list, first: int, last: int +) -> bool: + # When we are missing a COME_FROM_LOOP, the + # "while" statement is nested inside an if/else + # so after the POP_BLOCK we have a JUMP_FORWARD which forms the "else" portion of the "if" + # Check this. + # print("XXX", first, last, rule) + # for t in range(first, last): print(tokens[t]) + # print("="*40) + + return tokens[last - 1] == "POP_BLOCK" and tokens[last] not in ( + "JUMP_FORWARD", + "COME_FROM_LOOP", + "COME_FROM", + ) diff --git a/uncompyle6/parsers/reducecheck/whilestmt38.py b/uncompyle6/parsers/reducecheck/whilestmt38.py new file mode 100644 index 000000000..d53d82a8d --- /dev/null +++ b/uncompyle6/parsers/reducecheck/whilestmt38.py @@ -0,0 +1,41 @@ +# Copyright (c) 2022 Rocky Bernstein +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +def whilestmt38_check( + self, lhs: str, n: int, rule, ast, tokens: list, first: int, last: int +) -> bool: + # When we are missing a COME_FROM_LOOP, the + # "while" statement is nested inside an if/else + # so after the POP_BLOCK we have a JUMP_FORWARD which forms the "else" portion of the "if" + # Check this. + # print("XXX", first, last, rule) + # for t in range(first, last): + # print(tokens[t]) + # print("=" * 40) + + if tokens[last] != "COME_FROM" and tokens[last - 1] == "COME_FROM": + last -= 1 + if tokens[last - 1].kind.startswith("RAISE_VARARGS"): + return True + while tokens[last] == "COME_FROM": + last -= 1 + # In a "while" loop, (in contrast to "for" loop), the loop jump is + # always to the first offset + first_offset = tokens[first].off2int() + if tokens[last] == "JUMP_LOOP" and ( + tokens[last].attr == first_offset or tokens[last - 1].attr == first_offset + ): + return False + return True diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 7ef09b6bd..2cf43c18f 100644 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016, 2018-2021 by Rocky Bernstein +# Copyright (c) 2016, 2018-2026 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock @@ -21,13 +21,13 @@ scanners, e.g. for Python 2.7 or 3.4. """ -from typing import Optional +import importlib +from abc import ABC from array import array from collections import namedtuple -from sys import intern # noqa +from types import ModuleType +from typing import Optional, Union -from uncompyle6.scanners.tok import Token -from xdis.version_info import IS_PYPY, version_tuple_to_str import xdis from xdis import ( Bytecode, @@ -37,6 +37,10 @@ instruction_size, next_offset, ) +from xdis.op_imports import get_opcode_module +from xdis.version_info import IS_PYPY, PythonImplementation, version_tuple_to_str + +from uncompyle6.scanners.tok import Token # The byte code versions we support. # Note: these all have to be tuples of 2 ints @@ -80,6 +84,7 @@ # FIXME: DRY L65536 = 65536 + def long(num): return num @@ -87,7 +92,7 @@ def long(num): CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT", "CONST_MAP") -class Code(object): +class Code: """ Class for representing code-objects. @@ -96,35 +101,119 @@ class Code(object): """ def __init__(self, co, scanner, classname=None, show_asm=None): + # Full initialization is given below, but for linters + # well set up some initial values. + self.co_code = None # Really either bytes for >= 3.0 and string in < 3.0 + for i in dir(co): if i.startswith("co_"): setattr(self, i, getattr(co, i)) self._tokens, self._customize = scanner.ingest(co, classname, show_asm=show_asm) -class Scanner(object): - def __init__(self, version: tuple, show_asm=None, is_pypy=False): - self.version = version +class Scanner(ABC): + def __init__(self, version_tuple: tuple, show_asm=None, is_pypy=False): + self.version = version_tuple self.show_asm = show_asm self.is_pypy = is_pypy - if version[:2] in PYTHON_VERSIONS: - v_str = f"""opcode_{version_tuple_to_str(version, start=0, end=2, delimiter="")}""" - if is_pypy: - v_str += "pypy" - exec(f"""from xdis.opcodes import {v_str}""") - exec("self.opc = %s" % v_str) - else: - raise TypeError( - "%s is not a Python version I know about" - % version_tuple_to_str(version) + # Temporary initialization. + self.opc = ModuleType("uninitialized") + + if version_tuple[:2] in PYTHON_VERSIONS: + v_str = f"""opcode_{version_tuple_to_str(version_tuple, start=0, end=2, delimiter="")}""" + python_implementation = ( + PythonImplementation.PyPy if is_pypy else PythonImplementation.CPython ) + self.opc = get_opcode_module(version_tuple, python_implementation) + else: + raise TypeError("%s is not a Python version I know about" % v_str(version)) self.opname = self.opc.opname # FIXME: This weird Python2 behavior is not Python3 self.resetTokenClass() + def bound_collection_from_tokens(self, tokens, t, i, collection_type): + count = t.attr + assert isinstance(count, int) + + assert count <= i + + if collection_type == "CONST_DICT": + # constant dictionaries work via BUILD_CONST_KEY_MAP and + # handle the values() like sets and lists. + # However, the keys() are an LOAD_CONST of the keys. + # adjust offset to account for this + count += 1 + + # For small lists don't bother + if count < 5: + return None + + collection_start = i - count + + for j in range(collection_start, i): + if tokens[j].kind not in ( + "LOAD_CONST", + "LOAD_FAST", + "LOAD_GLOBAL", + "LOAD_NAME", + ): + return None + + collection_enum = CONST_COLLECTIONS.index(collection_type) + + # If we go there all instructions before tokens[i] are LOAD_CONST and we can replace + # add a boundary marker and change LOAD_CONST to something else + new_tokens = tokens[:-count] + start_offset = tokens[collection_start].offset + new_tokens.append( + Token( + opname="COLLECTION_START", + attr=collection_enum, + pattr=collection_type, + offset="%s_0" % start_offset, + has_arg=True, + opc=self.opc, + has_extended_arg=False, + ) + ) + for j in range(collection_start, i): + if tokens[j] == "LOAD_CONST": + opname = "ADD_VALUE" + op_type = "const" + else: + opname = "ADD_VALUE_VAR" + op_type = "name" + new_tokens.append( + Token( + opname=opname, + attr=tokens[j].attr, + pattr=tokens[j].pattr, + offset=tokens[j].offset, + has_arg=True, + linestart=tokens[j].linestart, + opc=self.opc, + has_extended_arg=False, + optype=op_type, + ) + ) + new_tokens.append( + Token( + opname="BUILD_%s" % collection_type, + attr=t.attr, + pattr=t.pattr, + offset=t.offset, + has_arg=t.has_arg, + linestart=t.linestart, + opc=t.opc, + has_extended_arg=False, + optype="vargs", + ) + ) + return new_tokens + def build_instructions(self, co): """ Create a list of instructions (a structured object rather than @@ -151,11 +240,10 @@ def build_lines_data(self, code_obj): # Offset: lineno pairs, only for offsets which start line. # Locally we use list for more convenient iteration using indices - if self.version > (1, 4): - linestarts = list(self.opc.findlinestarts(code_obj)) - else: - linestarts = [[0, 1]] + linestarts = list(self.opc.findlinestarts(code_obj)) self.linestarts = dict(linestarts) + if not self.linestarts: + return [] # 'List-map' which shows line number of current op and offset of # first op on following line, given offset of op as index @@ -209,6 +297,12 @@ def is_jump_forward(self, offset: int) -> bool: return False return offset < self.get_target(offset) + def ingest(self, co, classname=None, code_objects={}, show_asm=None): + """ + Code to tokenize disassembly. Subclasses must implement this. + """ + raise NotImplementedError("This method should have been implemented") + def prev_offset(self, offset: int) -> int: return self.insts[self.offset2inst_index[offset] - 1].offset @@ -243,15 +337,6 @@ def get_argument(self, pos: int): def next_offset(self, op, offset: int) -> int: return xdis.next_offset(op, self.opc, offset) - def print_bytecode(self): - for i in self.op_range(0, len(self.code)): - op = self.code[i] - if op in self.JUMP_OPS: - dest = self.get_target(i, op) - print("%i\t%s\t%i" % (i, self.opname[op], dest)) - else: - print("%i\t%s\t" % (i, self.opname[op])) - def first_instr(self, start: int, end: int, instr, target=None, exact=True): """ Find the first in the block from start to end. @@ -353,7 +438,7 @@ def inst_matches(self, start, end, instr, target=None, include_beyond_target=Fal """ try: None in instr - except: + except Exception: instr = [instr] first = self.offset2inst_index[start] @@ -407,7 +492,6 @@ def all_instr( result = [] extended_arg = 0 for offset in self.op_range(start, end): - op = code[offset] if op == self.opc.EXTENDED_ARG: @@ -466,7 +550,6 @@ def remove_extended_args(self, instructions): offset = inst.offset continue if last_was_extarg: - # j = self.stmts.index(inst.offset) # self.lines[j] = offset @@ -519,26 +602,24 @@ def restrict_to_parent(self, target: int, parent) -> int: target = parent["end"] return target - def setTokenClass(self, tokenClass) -> Token: + def setTokenClass(self, tokenClass: Token) -> Token: self.Token = tokenClass return self.Token -def parse_fn_counts(argc): - return ((argc & 0xFF), (argc >> 8) & 0xFF, (argc >> 16) & 0x7FFF) - - -def get_scanner(version, is_pypy=False, show_asm=None): - +def get_scanner(version: Union[str, tuple], is_pypy=False, show_asm=None) -> Scanner: + """ + Import the right scanner module for ``version`` and return the Scanner class + in that module. + """ # If version is a string, turn that into the corresponding float. if isinstance(version, str): if version not in canonic_python_version: - raise RuntimeError("Unknown Python version in xdis %s" % version) + raise RuntimeError(f"Unknown Python version in xdis {version}") canonic_version = canonic_python_version[version] if canonic_version not in CANONIC2VERSION: raise RuntimeError( - "Unsupported Python version %s (canonic %s)" - % (version, canonic_version) + f"Unsupported Python version {version} (canonic {canonic_version})" ) version = CANONIC2VERSION[canonic_version] @@ -589,5 +670,6 @@ def get_scanner(version, is_pypy=False, show_asm=None): # scanner = get_scanner('2.7.13', True) # scanner = get_scanner(sys.version[:5], False) from xdis.version_info import PYTHON_VERSION_TRIPLE + scanner = get_scanner(PYTHON_VERSION_TRIPLE, IS_PYPY, True) tokens, customize = scanner.ingest(co, {}, show_asm="after") diff --git a/uncompyle6/scanners/pypy37.py b/uncompyle6/scanners/pypy37.py index 6856c8392..eac8b2e3a 100644 --- a/uncompyle6/scanners/pypy37.py +++ b/uncompyle6/scanners/pypy37.py @@ -13,6 +13,7 @@ JUMP_OPs = opc.JUMP_OPS + # We base this off of 3.7 class ScannerPyPy37(scan.Scanner37): def __init__(self, show_asm): diff --git a/uncompyle6/scanners/pypy38.py b/uncompyle6/scanners/pypy38.py index e60ebeb1d..8f22e3975 100644 --- a/uncompyle6/scanners/pypy38.py +++ b/uncompyle6/scanners/pypy38.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 by Rocky Bernstein +# Copyright (c) 2021-2022 by Rocky Bernstein """ Python PyPy 3.8 decompiler scanner. @@ -6,13 +6,14 @@ make things easier for decompilation. """ -import decompyle3.scanners.scanner38 as scan +import uncompyle6.scanners.scanner38 as scan # bytecode verification, verify(), uses JUMP_OPS from here from xdis.opcodes import opcode_38pypy as opc JUMP_OPs = opc.JUMP_OPS + # We base this off of 3.8 class ScannerPyPy38(scan.Scanner38): def __init__(self, show_asm): diff --git a/uncompyle6/scanners/scanner15.py b/uncompyle6/scanners/scanner15.py index 2df854fe5..179dc086b 100644 --- a/uncompyle6/scanners/scanner15.py +++ b/uncompyle6/scanners/scanner15.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2018, 2021-2022 by Rocky Bernstein +# Copyright (c) 2016-2018, 2021-2023 by Rocky Bernstein """ Python 1.5 bytecode decompiler massaging. @@ -7,12 +7,15 @@ """ import uncompyle6.scanners.scanner21 as scan + # from uncompyle6.scanners.scanner26 import ingest as ingest26 # bytecode verification, verify(), uses JUMP_OPs from here from xdis.opcodes import opcode_15 + JUMP_OPS = opcode_15.JUMP_OPS + # We base this off of 2.2 instead of the other way around # because we cleaned things up this way. # The history is that 2.7 support is the cleanest, @@ -23,7 +26,7 @@ def __init__(self, show_asm=False): self.opc = opcode_15 self.opname = opcode_15.opname self.version = (1, 5) - self.genexpr_name = '' + self.genexpr_name = "" return def ingest(self, co, classname=None, code_objects={}, show_asm=None): @@ -36,18 +39,22 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): Some transformations are made to assist the deparsing grammar: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - - operands with stack argument counts or flag masks are appended to the opcode name, e.g.: + - operands with stack argument counts or flag masks are appended to the + opcode name, e.g.: * BUILD_LIST, BUILD_SET - * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments + * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional + arguments - EXTENDED_ARGS instructions are removed - Also, when we encounter certain tokens, we add them to a set which will cause custom - grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST - cause specific rules for the specific number of arguments they take. + Also, when we encounter certain tokens, we add them to a set which will cause + custom grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or + BUILD_LIST cause specific rules for the specific number of arguments they take. """ - tokens, customize = scan.Scanner21.ingest(self, co, classname, code_objects, show_asm) + tokens, customize = scan.Scanner21.ingest( + self, co, classname, code_objects, show_asm + ) for t in tokens: if t.op == self.opc.UNPACK_LIST: - t.kind = 'UNPACK_LIST_%d' % t.attr + t.kind = "UNPACK_LIST_%d" % t.attr pass return tokens, customize diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index 5a0b8090f..f5495b4f9 100644 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2022 by Rocky Bernstein +# Copyright (c) 2015-2025 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # @@ -36,12 +36,12 @@ from __future__ import print_function from copy import copy +from sys import intern -from xdis import code2num, iscode, op_has_argument, instruction_size -from xdis.bytecode import _get_const_info -from uncompyle6.scanner import Scanner, Token +from xdis import code2num, instruction_size, iscode, op_has_argument +from xdis.bytecode import _get_const_info, get_optype -from sys import intern +from uncompyle6.scanner import Scanner, Token class Scanner2(Scanner): @@ -55,7 +55,7 @@ def __init__(self, version, show_asm=None, is_pypy=False): self.load_asserts = set([]) # Create opcode classification sets - # Note: super initilization above initializes self.opc + # Note: super initialization above initializes self.opc # Ops that start SETUP_ ... We will COME_FROM with these names # Some blocks and END_ statements. And they can start @@ -200,19 +200,21 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific rules for the specific number of arguments they take. """ - if not show_asm: show_asm = self.show_asm bytecode = self.build_instructions(co) - # show_asm = 'after' if show_asm in ("both", "before"): - for instr in bytecode.get_instructions(co): - print(instr.disassemble()) + print("\n# ---- disassembly:") + bytecode.disassemble_bytes( + co, + line_starts=bytecode._linestarts, + asm_format="extended", + ) # list of tokens/instructions - tokens = [] + new_tokens = [] # "customize" is in the process of going away here customize = {} @@ -229,7 +231,6 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): # 'LOAD_ASSERT' is used in assert statements. self.load_asserts = set() for i in self.op_range(0, codelen): - # We need to detect the difference between: # raise AssertionError # and @@ -289,7 +290,7 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): if come_from_type not in ("LOOP", "EXCEPT"): come_from_name = "COME_FROM_%s" % come_from_type pass - tokens.append( + new_tokens.append( Token( come_from_name, jump_offset, @@ -303,6 +304,7 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): op = self.code[offset] op_name = self.op_name(op) + op_type = get_optype(op, self.opc) oparg = None pattr = None @@ -313,6 +315,31 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): if op == self.opc.EXTENDED_ARG: extended_arg += self.extended_arg_val(oparg) continue + + # Note: name used to match on rather than op since + # BUILD_SET isn't in earlier Pythons. + if op_name in ( + "BUILD_LIST", + "BUILD_SET", + ): + t = Token( + op_name, + oparg, + pattr, + offset, + self.linestarts.get(offset, None), + op, + has_arg, + self.opc, + ) + collection_type = op_name.split("_")[1] + next_tokens = self.bound_collection_from_tokens( + new_tokens, t, len(new_tokens), "CONST_%s" % collection_type + ) + if next_tokens is not None: + new_tokens = next_tokens + continue + if op in self.opc.CONST_OPS: const = co.co_consts[oparg] if iscode(const): @@ -347,12 +374,12 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): elif op in self.opc.JREL_OPS: # use instead: hasattr(self, 'patch_continue'): ? if self.version[:2] == (2, 7): - self.patch_continue(tokens, offset, op) + self.patch_continue(new_tokens, offset, op) pattr = repr(offset + 3 + oparg) elif op in self.opc.JABS_OPS: # use instead: hasattr(self, 'patch_continue'): ? if self.version[:2] == (2, 7): - self.patch_continue(tokens, offset, op) + self.patch_continue(new_tokens, offset, op) pattr = repr(oparg) elif op in self.opc.LOCAL_OPS: pattr = varnames[oparg] @@ -400,7 +427,16 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): # either to a FOR_ITER or the instruction after a SETUP_LOOP # and it is followed by another JUMP_FORWARD # then we'll take it as a "continue". - j = self.offset2inst_index[offset] + j = self.offset2inst_index.get(offset) + + # EXTENDED_ARG doesn't appear in instructions, + # but is instead the next opcode folded into it, and has the offset + # of the EXTENDED_ARG. Therefore in self.offset2nist_index we'll find + # the instruction at the previous EXTENDED_ARG offset which is 3 + # bytes back. + if j is None and offset > self.opc.ARG_MAX_VALUE: + j = self.offset2inst_index[offset - 3] + target_index = self.offset2inst_index[target] is_continue = ( self.insts[target_index - 1].opname == "SETUP_LOOP" @@ -433,13 +469,21 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): linestart = self.linestarts.get(offset, None) if offset not in replace: - tokens.append( + new_tokens.append( Token( - op_name, oparg, pattr, offset, linestart, op, has_arg, self.opc + op_name, + oparg, + pattr, + offset, + linestart, + op, + has_arg, + self.opc, + optype=op_type, ) ) else: - tokens.append( + new_tokens.append( Token( replace[offset], oparg, @@ -449,16 +493,19 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): op, has_arg, self.opc, + optype=op_type, ) ) pass pass if show_asm in ("both", "after"): - for t in tokens: + print("\n# ---- tokenization:") + # FIXME: t.format() is changing tokens! + for t in new_tokens.copy(): print(t.format(line_prefix="")) print() - return tokens, customize + return new_tokens, customize def build_statement_indices(self): code = self.code @@ -504,14 +551,17 @@ def build_statement_indices(self): for s in stmt_list: if code[s] == self.opc.JUMP_ABSOLUTE and s not in pass_stmts: target = self.get_target(s) - if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no: + if target > s or ( + self.lines and self.lines[last_stmt].l_no == self.lines[s].l_no + ): stmts.remove(s) continue j = self.prev[s] while code[j] == self.opc.JUMP_ABSOLUTE: j = self.prev[j] if ( - self.version >= (2, 3) and self.opname_for_offset(j) == "LIST_APPEND" + self.version >= (2, 3) + and self.opname_for_offset(j) == "LIST_APPEND" ): # list comprehension stmts.remove(s) continue @@ -605,6 +655,7 @@ def detect_control_flow(self, offset, op, extended_arg): parent = self.structs[0] start = parent["start"] end = parent["end"] + next_line_byte = end # Pick inner-most parent for our offset for struct in self.structs: @@ -633,7 +684,8 @@ def detect_control_flow(self, offset, op, extended_arg): if setup_target != loop_end_offset: self.fixed_jumps[offset] = loop_end_offset - (line_no, next_line_byte) = self.lines[offset] + if self.lines: + (line_no, next_line_byte) = self.lines[offset] # jump_back_offset is the instruction after the SETUP_LOOP # where we iterate back to. @@ -886,8 +938,7 @@ def detect_control_flow(self, offset, op, extended_arg): # Is it an "and" inside an "if" or "while" block if op == self.opc.PJIF: - - # Search for other POP_JUMP_IF_...'s targetting the + # Search for other POP_JUMP_IF_...'s targeting the # same target, of the current POP_JUMP_... instruction, # starting from current offset, and filter everything inside inner 'or' # jumps and mid-line ifs @@ -986,7 +1037,7 @@ def detect_control_flow(self, offset, op, extended_arg): ): self.fixed_jumps[offset] = rtarget else: - # note test for < 2.7 might be superflous although informative + # note test for < 2.7 might be superfluous although informative # for 2.7 a different branch is taken and the below code is handled # under: elif op in self.pop_jump_if_or_pop # below @@ -1076,9 +1127,8 @@ def detect_control_flow(self, offset, op, extended_arg): if code_pre_rtarget in self.jump_forward: if_end = self.get_target(pre_rtarget) - # Is this a loop and not an "if" statment? + # Is this a loop and not an "if" statement? if (if_end < pre_rtarget) and (pre[if_end] in self.setup_loop_targets): - if if_end > start: return else: @@ -1299,9 +1349,9 @@ def find_jump_targets(self, debug): # FIXME FIXME FIXME # All the conditions are horrible, and I am not sure I - # undestand fully what's going l + # understand fully what's going l # We REALLY REALLY need a better way to handle control flow - # Expecially for < 2.7 + # Especially for < 2.7 if label is not None and label != -1: if self.version[:2] == (2, 7): # FIXME: rocky: I think we need something like this... diff --git a/uncompyle6/scanners/scanner26.py b/uncompyle6/scanners/scanner26.py index a24fccc11..81adb0930 100755 --- a/uncompyle6/scanners/scanner26.py +++ b/uncompyle6/scanners/scanner26.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2017, 2021-2022 by Rocky Bernstein +# Copyright (c) 2015-2017, 2021-2022, 2024 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # @@ -23,33 +23,36 @@ """ import sys -import uncompyle6.scanners.scanner2 as scan -from uncompyle6.scanner import L65536 # bytecode verification, verify(), uses JUMP_OPs from here -from xdis.opcodes import opcode_26 +from xdis import iscode from xdis.bytecode import _get_const_info +from xdis.opcodes import opcode_26 +import uncompyle6.scanners.scanner2 as scan from uncompyle6.scanner import Token intern = sys.intern JUMP_OPS = opcode_26.JUMP_OPS + class Scanner26(scan.Scanner2): def __init__(self, show_asm=False): super(Scanner26, self).__init__((2, 6), show_asm) # "setup" opcodes - self.setup_ops = frozenset([ - self.opc.SETUP_EXCEPT, self.opc.SETUP_FINALLY, - ]) + self.setup_ops = frozenset( + [ + self.opc.SETUP_EXCEPT, + self.opc.SETUP_FINALLY, + ] + ) return def ingest(self, co, classname=None, code_objects={}, show_asm=None): - """ - Create "tokens" the bytecode of an Python code object. Largely these + """Create "tokens" the bytecode of an Python code object. Largely these are the opcode name, but in some cases that has been modified to make parsing easier. returning a list of uncompyle6 Token's. @@ -57,14 +60,17 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): Some transformations are made to assist the deparsing grammar: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - - operands with stack argument counts or flag masks are appended to the opcode name, e.g.: + - operands with stack argument counts or flag masks are appended to the + opcode name, e.g.: * BUILD_LIST, BUILD_SET - * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments + * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional + arguments - EXTENDED_ARGS instructions are removed - Also, when we encounter certain tokens, we add them to a set which will cause custom - grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST - cause specific rules for the specific number of arguments they take. + Also, when we encounter certain tokens, we add them to a set + which will cause custom grammar rules. Specifically, variable + arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific + rules for the specific number of arguments they take. """ if not show_asm: @@ -73,16 +79,19 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): bytecode = self.build_instructions(co) # show_asm = 'after' - if show_asm in ('both', 'before'): - for instr in bytecode.get_instructions(co): - print(instr.disassemble()) - + if show_asm in ("both", "before"): + print("\n# ---- disassembly:") + bytecode.disassemble_bytes( + co, + line_starts=bytecode._linestarts, + asm_format="extended", + ) # Container for tokens tokens = [] customize = {} if self.is_pypy: - customize['PyPy'] = 1 + customize["PyPy"] = 0 codelen = len(self.code) @@ -98,12 +107,14 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): # raise AssertionError # and # assert ... - if (self.code[i] == self.opc.JUMP_IF_TRUE and - i + 4 < codelen and - self.code[i+3] == self.opc.POP_TOP and - self.code[i+4] == self.opc.LOAD_GLOBAL): - if names[self.get_argument(i+4)] == 'AssertionError': - self.load_asserts.add(i+4) + if ( + self.code[i] == self.opc.JUMP_IF_TRUE + and i + 4 < codelen + and self.code[i + 3] == self.opc.POP_TOP + and self.code[i + 4] == self.opc.LOAD_GLOBAL + ): + if names[self.get_argument(i + 4)] == "AssertionError": + self.load_asserts.add(i + 4) jump_targets = self.find_jump_targets(show_asm) # contains (code, [addrRefToCode]) @@ -112,21 +123,24 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): i = self.next_stmt[last_stmt] replace = {} while i < codelen - 1: - if self.lines[last_stmt].next > i: + if self.lines and self.lines[last_stmt].next > i: # Distinguish "print ..." from "print ...," if self.code[last_stmt] == self.opc.PRINT_ITEM: if self.code[i] == self.opc.PRINT_ITEM: - replace[i] = 'PRINT_ITEM_CONT' + replace[i] = "PRINT_ITEM_CONT" elif self.code[i] == self.opc.PRINT_NEWLINE: - replace[i] = 'PRINT_NEWLINE_CONT' + replace[i] = "PRINT_NEWLINE_CONT" last_stmt = i i = self.next_stmt[i] extended_arg = 0 + i = -1 for offset in self.op_range(0, codelen): + i += 1 op = self.code[offset] op_name = self.opname[op] - oparg = None; pattr = None + oparg = None + pattr = None if offset in jump_targets: jump_idx = 0 @@ -137,52 +151,84 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): # properly. For example, a "loop" with an "if" nested in it should have the # "loop" tag last so the grammar rule matches that properly. last_jump_offset = -1 - for jump_offset in sorted(jump_targets[offset], reverse=True): + for jump_offset in sorted(jump_targets[offset], reverse=True): if jump_offset != last_jump_offset: - tokens.append(Token( - 'COME_FROM', jump_offset, repr(jump_offset), - offset="%s_%d" % (offset, jump_idx), - has_arg = True)) + tokens.append( + Token( + "COME_FROM", + jump_offset, + repr(jump_offset), + offset="%s_%d" % (offset, jump_idx), + has_arg=True, + ) + ) jump_idx += 1 last_jump_offset = jump_offset elif offset in self.thens: - tokens.append(Token( - 'THEN', None, self.thens[offset], - offset="%s_0" % offset, - has_arg = True)) - - has_arg = (op >= self.opc.HAVE_ARGUMENT) + tokens.append( + Token( + "THEN", + None, + self.thens[offset], + offset="%s_0" % offset, + has_arg=True, + ) + ) + + has_arg = op >= self.opc.HAVE_ARGUMENT if has_arg: oparg = self.get_argument(offset) + extended_arg extended_arg = 0 if op == self.opc.EXTENDED_ARG: - extended_arg = oparg * L65536 + extended_arg += self.extended_arg_val(oparg) continue + + # Note: name used to match on rather than op since + # BUILD_SET isn't in earlier Pythons. + if op_name in ( + "BUILD_LIST", + "BUILD_SET", + ): + t = Token( + op_name, + oparg, + pattr, + offset, + self.linestarts.get(offset, None), + op, + has_arg, + self.opc, + ) + + collection_type = op_name.split("_")[1] + next_tokens = self.bound_collection_from_tokens( + tokens, t, len(tokens), "CONST_%s" % collection_type + ) + if next_tokens is not None: + tokens = next_tokens + continue + if op in self.opc.CONST_OPS: const = co.co_consts[oparg] - # We can't use inspect.iscode() because we may be - # using a different version of Python than the - # one that this was byte-compiled on. So the code - # types may mismatch. - if hasattr(const, 'co_name'): + if iscode(const): oparg = const - if const.co_name == '': - assert op_name == 'LOAD_CONST' - op_name = 'LOAD_LAMBDA' + if const.co_name == "": + assert op_name == "LOAD_CONST" + op_name = "LOAD_LAMBDA" elif const.co_name == self.genexpr_name: - op_name = 'LOAD_GENEXPR' - elif const.co_name == '': - op_name = 'LOAD_DICTCOMP' - elif const.co_name == '': - op_name = 'LOAD_SETCOMP' + op_name = "LOAD_GENEXPR" + elif const.co_name == "": + op_name = "LOAD_DICTCOMP" + elif const.co_name == "": + op_name = "LOAD_SETCOMP" else: op_name = "LOAD_CODE" - # verify uses 'pattr' for comparison, since 'attr' + # verify() uses 'pattr' for comparison, since 'attr' # now holds Code(const) and thus can not be used # for comparison (todo: think about changing this) - # pattr = 'code_object @ 0x%x %s->%s' % \ + # pattr = 'code_object @ 0x%x %s->%s' %\ # (id(const), const.co_filename, const.co_name) - pattr = '' + pattr = "" else: if oparg < len(co.co_consts): argval, _ = _get_const_info(oparg, co.co_consts) @@ -200,32 +246,42 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): # FIXME: this is a hack to catch stuff like: # if x: continue # the "continue" is not on a new line. - if len(tokens) and tokens[-1].kind == 'JUMP_BACK': - tokens[-1].kind = intern('CONTINUE') + if len(tokens) and tokens[-1].kind == "JUMP_BACK": + tokens[-1].kind = intern("CONTINUE") elif op in self.opc.JABS_OPS: pattr = repr(oparg) elif op in self.opc.LOCAL_OPS: - pattr = varnames[oparg] + if self.version < (1, 5): + pattr = names[oparg] + else: + pattr = varnames[oparg] elif op in self.opc.COMPARE_OPS: pattr = self.opc.cmp_op[oparg] elif op in self.opc.FREE_OPS: pattr = free[oparg] + if op in self.varargs_ops: # CE - Hack for >= 2.5 # Now all values loaded via LOAD_CLOSURE are packed into # a tuple before calling MAKE_CLOSURE. - if (self.version >= (2, 5) and op == self.opc.BUILD_TUPLE and - self.code[self.prev[offset]] == self.opc.LOAD_CLOSURE): + if ( + self.version >= (2, 5) + and op == self.opc.BUILD_TUPLE + and self.code[self.prev[offset]] == self.opc.LOAD_CLOSURE + ): continue else: - op_name = '%s_%d' % (op_name, oparg) + op_name = "%s_%d" % (op_name, oparg) customize[op_name] = oparg elif self.version > (2, 0) and op == self.opc.CONTINUE_LOOP: customize[op_name] = 0 - elif op_name in """ + elif ( + op_name + in """ CONTINUE_LOOP EXEC_STMT LOAD_LISTCOMP LOAD_SETCOMP - """.split(): + """.split() + ): customize[op_name] = 0 elif op == self.opc.JUMP_ABSOLUTE: # Further classify JUMP_ABSOLUTE into backward jumps @@ -241,46 +297,60 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): # rule for that. target = self.get_target(offset) if target <= offset: - op_name = 'JUMP_BACK' - if (offset in self.stmts - and self.code[offset+3] not in (self.opc.END_FINALLY, - self.opc.POP_BLOCK)): - if ((offset in self.linestarts and - tokens[-1].kind == 'JUMP_BACK') - or offset not in self.not_continue): - op_name = 'CONTINUE' + op_name = "JUMP_BACK" + if offset in self.stmts and self.code[offset + 3] not in ( + self.opc.END_FINALLY, + self.opc.POP_BLOCK, + ): + if ( + offset in self.linestarts and tokens[-1].kind == "JUMP_BACK" + ) or offset not in self.not_continue: + op_name = "CONTINUE" else: # FIXME: this is a hack to catch stuff like: # if x: continue # the "continue" is not on a new line. - if tokens[-1].kind == 'JUMP_BACK': + if tokens[-1].kind == "JUMP_BACK": # We need 'intern' since we have # already have processed the previous # token. - tokens[-1].kind = intern('CONTINUE') + tokens[-1].kind = intern("CONTINUE") elif op == self.opc.LOAD_GLOBAL: if offset in self.load_asserts: - op_name = 'LOAD_ASSERT' + op_name = "LOAD_ASSERT" elif op == self.opc.RETURN_VALUE: if offset in self.return_end_ifs: - op_name = 'RETURN_END_IF' + op_name = "RETURN_END_IF" linestart = self.linestarts.get(offset, None) if offset not in replace: - tokens.append(Token( - op_name, oparg, pattr, offset, linestart, op, - has_arg, self.opc)) + tokens.append( + Token( + op_name, oparg, pattr, offset, linestart, op, has_arg, self.opc + ) + ) else: - tokens.append(Token( - replace[offset], oparg, pattr, offset, linestart, op, - has_arg, self.opc)) + tokens.append( + Token( + replace[offset], + oparg, + pattr, + offset, + linestart, + op, + has_arg, + self.opc, + ) + ) pass pass - if show_asm in ('both', 'after'): - for t in tokens: + if show_asm in ("both", "after"): + print("\n# ---- tokenization:") + # FIXME: t.format() is changing tokens! + for t in tokens.copy(): print(t.format(line_prefix="")) print() return tokens, customize diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 0fe6a2c2d..a2703eba9 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2019, 2021-2022 by Rocky Bernstein +# Copyright (c) 2015-2019, 2021-2024, 2026 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # @@ -35,21 +35,20 @@ from __future__ import print_function +import sys from typing import Optional, Tuple -from xdis import iscode, instruction_size, Instruction -from xdis.bytecode import _get_const_info - -from uncompyle6.scanners.tok import Token -from uncompyle6.scanner import parse_fn_counts import xdis +from xdis import Instruction, instruction_size, iscode +from xdis.bytecode import _get_const_info # Get all the opcodes into globals -import xdis.opcodes.opcode_33 as op3 - -from uncompyle6.scanner import Scanner, CONST_COLLECTIONS +from xdis.opcodes import opcode_33 as op3 +from xdis.opcodes.opcode_3x.opcode_3x import parse_fn_counts_30_35 -import sys +from uncompyle6.scanner import CONST_COLLECTIONS, Scanner +from uncompyle6.scanners.tok import Token +from uncompyle6.util import get_code_name intern = sys.intern @@ -61,7 +60,7 @@ def __init__(self, version, show_asm=None, is_pypy=False): super(Scanner3, self).__init__(version, show_asm, is_pypy) # Create opcode classification sets - # Note: super initilization above initializes self.opc + # Note: super initialization above initializes self.opc # For ops that start SETUP_ ... we will add COME_FROM with these names # at the their targets. @@ -208,11 +207,18 @@ def __init__(self, version, show_asm=None, is_pypy=False): return def bound_collection_from_inst( - self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int, collection_type: str + self, + insts: list, + next_tokens: list, + inst: Instruction, + t: Token, + i: int, + collection_type: str, ) -> Optional[list]: """ - Try to a replace sequence of instruction that ends with a BUILD_xxx with a sequence that can - be parsed much faster, but inserting the token boundary at the beginning of the sequence. + Try to replace a sequence of instruction that ends with a + BUILD_xxx with a sequence that can be parsed much faster, but + inserting the token boundary at the beginning of the sequence. """ count = t.attr assert isinstance(count, int) @@ -220,7 +226,7 @@ def bound_collection_from_inst( assert count <= i if collection_type == "CONST_DICT": - # constant dictonaries work via BUILD_CONST_KEY_MAP and + # constant dictionaries work via BUILD_CONST_KEY_MAP and # handle the values() like sets and lists. # However the keys() are an LOAD_CONST of the keys. # adjust offset to account for this @@ -260,6 +266,7 @@ def bound_collection_from_inst( has_arg=True, has_extended_arg=False, opc=self.opc, + optype="pseudo", ) ) for j in range(collection_start, i): @@ -273,6 +280,7 @@ def bound_collection_from_inst( has_arg=True, has_extended_arg=False, opc=self.opc, + optype=insts[j].optype, ) ) new_tokens.append( @@ -285,15 +293,19 @@ def bound_collection_from_inst( has_arg=t.has_arg, has_extended_arg=False, opc=t.opc, + optype="pseudo", ) ) return new_tokens - def bound_map_from_inst( - self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int) -> Optional[list]: + # Move to scanner35? + def bound_map_from_inst_35( + self, insts: list, next_tokens: list, t: Token, i: int + ) -> Optional[list]: """ - Try to a sequence of instruction that ends with a BUILD_MAP into a sequence that can - be parsed much faster, but inserting the token boundary at the beginning of the sequence. + Try to a sequence of instruction that ends with a BUILD_MAP into + a sequence that can be parsed much faster, but inserting the + token boundary at the beginning of the sequence. """ count = t.attr assert isinstance(count, int) @@ -304,25 +316,24 @@ def bound_map_from_inst( if count < 5: return None + # Newer Python BUILD_MAP argument's count is a + # key and value pair so it is multiplied by two. collection_start = i - (count * 2) assert (count * 2) <= i for j in range(collection_start, i, 2): - if insts[j].opname not in ( - "LOAD_CONST", - ): + if insts[j].opname not in ("LOAD_CONST",): return None - if insts[j+1].opname not in ( - "LOAD_CONST", - ): + if insts[j + 1].opname not in ("LOAD_CONST",): return None collection_start = i - (2 * count) collection_enum = CONST_COLLECTIONS.index("CONST_MAP") - # If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace - # add a boundary marker and change LOAD_CONST to something else - new_tokens = next_tokens[:-(2*count)] + # If we get here, all instructions before tokens[i] are LOAD_CONST and + # we can replace add a boundary marker and change LOAD_CONST to + # something else. + new_tokens = next_tokens[: -(2 * count)] start_offset = insts[collection_start].offset new_tokens.append( Token( @@ -330,10 +341,11 @@ def bound_map_from_inst( attr=collection_enum, pattr="CONST_MAP", offset=f"{start_offset}_0", - linestart=False, + linestart=insts[collection_start].starts_line, has_arg=True, has_extended_arg=False, opc=self.opc, + optype="pseudo", ) ) for j in range(collection_start, i, 2): @@ -347,23 +359,25 @@ def bound_map_from_inst( has_arg=True, has_extended_arg=False, opc=self.opc, + optype="pseudo", ) ) new_tokens.append( Token( opname="ADD_VALUE", - attr=insts[j+1].argval, - pattr=insts[j+1].argrepr, - offset=insts[j+1].offset, - linestart=insts[j+1].starts_line, + attr=insts[j + 1].argval, + pattr=insts[j + 1].argrepr, + offset=insts[j + 1].offset, + linestart=insts[j + 1].starts_line, has_arg=True, has_extended_arg=False, opc=self.opc, + optype="pseudo", ) ) new_tokens.append( Token( - opname=f"BUILD_DICT_OLDER", + opname="BUILD_DICT_OLDER", attr=t.attr, pattr=t.pattr, offset=t.offset, @@ -371,12 +385,14 @@ def bound_map_from_inst( has_arg=t.has_arg, has_extended_arg=False, opc=t.opc, + optype="pseudo", ) ) return new_tokens - def ingest(self, co, classname=None, code_objects={}, show_asm=None - ) -> Tuple[list, dict]: + def ingest( + self, co, classname=None, code_objects={}, show_asm=None + ) -> Tuple[list, dict]: """ Create "tokens" the bytecode of an Python code object. Largely these are the opcode name, but in some cases that has been modified to make parsing @@ -386,14 +402,17 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None Some transformations are made to assist the deparsing grammar: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - - operands with stack argument counts or flag masks are appended to the opcode name, e.g.: + - operands with stack argument counts or flag masks are appended to the + opcode name, e.g.: * BUILD_LIST, BUILD_SET - * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments + * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional + arguments - EXTENDED_ARGS instructions are removed - Also, when we encounter certain tokens, we add them to a set which will cause custom - grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST - cause specific rules for the specific number of arguments they take. + Also, when we encounter certain tokens, we add them to a set + which will cause custom grammar rules. Specifically, variable + arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific rules + for the specific number of arguments they take. """ if not show_asm: @@ -403,8 +422,12 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None # show_asm = 'both' if show_asm in ("both", "before"): - for instr in bytecode.get_instructions(co): - print(instr.disassemble()) + print("\n# ---- disassembly:") + bytecode.disassemble_bytes( + co, + line_starts=bytecode._linestarts, + asm_format="extended", + ) # "customize" is in the process of going away here customize = {} @@ -419,7 +442,6 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None n = len(self.insts) for i, inst in enumerate(self.insts): - opname = inst.opname # We need to detect the difference between: # raise AssertionError @@ -436,12 +458,12 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None prev_inst = self.insts[i - 1] assert_can_follow = ( prev_inst.opname in ("JUMP_IF_TRUE", "JUMP_IF_FALSE") - and i + 1 < n ) + and i + 1 < n + ) jump_if_inst = prev_inst else: assert_can_follow = ( - opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE") - and i + 1 < n + opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE") and i + 1 < n ) jump_if_inst = inst if assert_can_follow: @@ -451,7 +473,9 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None and next_inst.argval == "AssertionError" and jump_if_inst.argval ): - raise_idx = self.offset2inst_index[self.prev_op[jump_if_inst.argval]] + raise_idx = self.offset2inst_index[ + self.prev_op[jump_if_inst.argval] + ] raise_inst = self.insts[raise_idx] if raise_inst.opname.startswith("RAISE_VARARGS"): self.load_asserts.add(next_inst.offset) @@ -466,23 +490,31 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None last_op_was_break = False new_tokens = [] + skip_end_offset = None + for i, inst in enumerate(self.insts): + # BUILD_MAP for < 3.5 can skip *forward* in instructions and + # replace them. So we use the below to get up to the position + # scanned and replaced forward + if skip_end_offset and inst.offset <= skip_end_offset: + continue + skip_end_offset = None opname = inst.opname argval = inst.argval pattr = inst.argrepr t = Token( - opname=opname, - attr=argval, - pattr=pattr, - offset=inst.offset, - linestart=inst.starts_line, - op=inst.opcode, - has_arg=inst.has_arg, - has_extended_arg=inst.has_extended_arg, - opc=self.opc, - ) + opname=opname, + attr=argval, + pattr=pattr, + offset=inst.offset, + linestart=inst.starts_line, + op=inst.opcode, + has_arg=inst.has_arg, + has_extended_arg=inst.has_extended_arg, + opc=self.opc, + ) # things that smash new_tokens like BUILD_LIST have to come first. if opname in ( @@ -501,24 +533,29 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None if try_tokens is not None: new_tokens = try_tokens continue - elif opname in ( - "BUILD_MAP", - ): - try_tokens = self.bound_map_from_inst( - self.insts, new_tokens, inst, t, i, - ) - if try_tokens is not None: - new_tokens = try_tokens - continue + + elif opname in ("BUILD_MAP",): + if self.version >= (3, 5): + try_tokens = self.bound_map_from_inst_35( + self.insts, + new_tokens, + t, + i, + ) + if try_tokens is not None: + new_tokens = try_tokens + continue + pass + pass + pass argval = inst.argval op = inst.opcode if opname == "EXTENDED_ARG": - # FIXME: The EXTENDED_ARG is used to signal annotation - # parameters - if i + 1 < n and self.insts[i + 1].opcode != self.opc.MAKE_FUNCTION: - continue + # EXTEND_ARG adjustments to the operand value should have + # already been accounted for in xdis instruction creation. + continue if inst.offset in jump_targets: jump_idx = 0 @@ -572,16 +609,17 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None if op in self.opc.CONST_OPS: const = argval if iscode(const): - if const.co_name == "": + co_name = get_code_name(const) + if co_name == "": assert opname == "LOAD_CONST" opname = "LOAD_LAMBDA" - elif const.co_name == "": + elif co_name == "": opname = "LOAD_GENEXPR" - elif const.co_name == "": + elif co_name == "": opname = "LOAD_DICTCOMP" - elif const.co_name == "": + elif co_name == "": opname = "LOAD_SETCOMP" - elif const.co_name == "": + elif co_name == "": opname = "LOAD_LISTCOMP" else: opname = "LOAD_CODE" @@ -589,8 +627,8 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None # now holds Code(const) and thus can not be used # for comparison (todo: think about changing this) # pattr = 'code_object @ 0x%x %s->%s' %\ - # (id(const), const.co_filename, const.co_name) - pattr = "" + # (id(const), const.co_filename, co_name) + pattr = "" elif isinstance(const, str): opname = "LOAD_STR" else: @@ -601,6 +639,10 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None # other parts like n_LOAD_CONST in pysource.py for example. pattr = const pass + elif opname == "LOAD_FAST" and argval == ".0": + # Used as the parameter of a list expression + opname = "LOAD_ARG" + elif opname in ("MAKE_FUNCTION", "MAKE_CLOSURE"): if self.version >= (3, 6): # 3.6+ doesn't have MAKE_CLOSURE, so opname == 'MAKE_FUNCTION' @@ -619,23 +661,29 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None flags >>= 1 attr = attr[:4] # remove last value: attr[5] == False else: - pos_args, name_pair_args, annotate_args = parse_fn_counts( + pos_args, name_pair_args, annotate_args = parse_fn_counts_30_35( inst.argval ) - pattr = "%d positional, %d keyword only, %d annotated" % ( - pos_args, - name_pair_args, - annotate_args, - ) - if name_pair_args > 0: + + pattr = f"{pos_args} positional, {name_pair_args} keyword only, {annotate_args} annotated" + + if name_pair_args > 0 and annotate_args > 0: # FIXME: this should probably be K_ - opname = "%s_N%d" % (opname, name_pair_args) + opname += f"_N{name_pair_args}_A{annotate_args}" pass - if annotate_args > 0: - opname = "%s_A_%d" % (opname, annotate_args) + elif annotate_args > 0: + opname += f"_A_{annotate_args}" pass - opname = "%s_%d" % (opname, pos_args) + elif name_pair_args > 0: + opname += f"_N_{name_pair_args}" + pass + else: + # Rule customization mathics, MAKE_FUNCTION_... + # so make sure to add the "_" + opname += "_0" + attr = (pos_args, name_pair_args, annotate_args) + new_tokens.append( Token( opname=opname, @@ -701,11 +749,13 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None and self.insts[i + 1].opname == "JUMP_FORWARD" ) - if (self.version[:2] == (3, 0) and self.insts[i + 1].opname == "JUMP_FORWARD" - and not is_continue): + if ( + self.version[:2] == (3, 0) + and self.insts[i + 1].opname == "JUMP_FORWARD" + and not is_continue + ): target_prev = self.offset2inst_index[self.prev_op[target]] - is_continue = ( - self.insts[target_prev].opname == "SETUP_LOOP") + is_continue = self.insts[target_prev].opname == "SETUP_LOOP" if is_continue or ( inst.offset in self.stmts @@ -722,7 +772,10 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None # the "continue" is not on a new line. # There are other situations where we don't catch # CONTINUE as well. - if new_tokens[-1].kind == "JUMP_BACK" and new_tokens[-1].attr <= argval: + if ( + new_tokens[-1].kind == "JUMP_BACK" + and new_tokens[-1].attr <= argval + ): if new_tokens[-2].kind == "BREAK_LOOP": del new_tokens[-1] else: @@ -748,7 +801,9 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None pass if show_asm in ("both", "after"): - for t in new_tokens: + print("\n# ---- tokenization:") + # FIXME: t.format() is changing tokens! + for t in new_tokens.copy(): print(t.format(line_prefix="")) print() return new_tokens, customize @@ -795,7 +850,10 @@ def find_jump_targets(self, debug): if inst.has_arg: label = self.fixed_jumps.get(offset) oparg = inst.arg - if self.version >= (3, 6) and self.code[offset] == self.opc.EXTENDED_ARG: + if ( + self.version >= (3, 6) + and self.code[offset] == self.opc.EXTENDED_ARG + ): j = xdis.next_offset(op, self.opc, offset) next_offset = xdis.next_offset(op, self.opc, j) else: @@ -1068,7 +1126,6 @@ def detect_control_flow(self, offset, targets, inst_index): and (target > offset) and pretarget.offset != offset ): - # FIXME: hack upon hack... # In some cases the pretarget can be a jump to the next instruction # and these aren't and/or's either. We limit to 3.5+ since we experienced there @@ -1090,8 +1147,7 @@ def detect_control_flow(self, offset, targets, inst_index): # Is it an "and" inside an "if" or "while" block if op == self.opc.POP_JUMP_IF_FALSE: - - # Search for another POP_JUMP_IF_FALSE targetting the same op, + # Search for another POP_JUMP_IF_FALSE targeting the same op, # in current statement, starting from current offset, and filter # everything inside inner 'or' jumps and midline ifs match = self.rem_or( @@ -1298,7 +1354,7 @@ def detect_control_flow(self, offset, targets, inst_index): self.not_continue.add(pre_rtarget) elif code[pre_rtarget] in rtarget_break: self.structs.append({"type": "if-then", "start": start, "end": rtarget}) - # It is important to distingish if this return is inside some sort + # It is important to distinguish if this return is inside some sort # except block return jump_prev = prev_op[offset] if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP: @@ -1343,7 +1399,6 @@ def detect_control_flow(self, offset, targets, inst_index): self.fixed_jumps[offset] = rtarget self.not_continue.add(pre_rtarget) else: - # FIXME: this is very convoluted and based on rather hacky # empirical evidence. It should go a way when # we have better control-flow analysis diff --git a/uncompyle6/scanners/scanner30.py b/uncompyle6/scanners/scanner30.py index f3e92de49..ea3d3b9be 100644 --- a/uncompyle6/scanners/scanner30.py +++ b/uncompyle6/scanners/scanner30.py @@ -8,15 +8,16 @@ from __future__ import print_function -# bytecode verification, verify(), uses JUMP_OPs from here -from xdis.opcodes import opcode_30 as opc -from xdis import instruction_size import xdis +from xdis import instruction_size -JUMP_TF = frozenset([opc.JUMP_IF_FALSE, opc.JUMP_IF_TRUE]) +# bytecode verification, verify(), uses JUMP_OPs from here +from xdis.opcodes import opcode_30 as opc from uncompyle6.scanners.scanner3 import Scanner3 +JUMP_TF = frozenset([opc.JUMP_IF_FALSE, opc.JUMP_IF_TRUE]) + class Scanner30(Scanner3): def __init__(self, show_asm=None, is_pypy=False): @@ -40,7 +41,7 @@ def detect_control_flow(self, offset, targets, inst_index): start = parent["start"] end = parent["end"] - # Pick inner-most parent for our offset + # Pick innermost parent for our offset for struct in self.structs: current_start = struct["start"] current_end = struct["end"] @@ -193,7 +194,7 @@ def detect_control_flow(self, offset, targets, inst_index): # Is it an "and" inside an "if" or "while" block if op == opc.JUMP_IF_FALSE: - # Search for another JUMP_IF_FALSE targetting the same op, + # Search for another JUMP_IF_FALSE targeting the same op, # in current statement, starting from current offset, and filter # everything inside inner 'or' jumps and midline ifs match = self.rem_or( @@ -348,7 +349,7 @@ def detect_control_flow(self, offset, targets, inst_index): if if_end > start: return - end = self.restrict_to_parent(if_end, parent) + self.restrict_to_parent(if_end, parent) self.structs.append( {"type": "if-then", "start": start, "end": pre_rtarget} @@ -365,20 +366,19 @@ def detect_control_flow(self, offset, targets, inst_index): # 'end': end}) # self.else_start[rtarget] = end elif self.is_jump_back(pre_rtarget, 0): - if_end = rtarget self.structs.append( {"type": "if-then", "start": start, "end": pre_rtarget} ) self.not_continue.add(pre_rtarget) elif code[pre_rtarget] in (self.opc.RETURN_VALUE, self.opc.BREAK_LOOP): self.structs.append({"type": "if-then", "start": start, "end": rtarget}) - # It is important to distingish if this return is inside some sort + # It is important to distinguish if this return is inside some sort # except block return jump_prev = prev_op[offset] if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP: if self.opc.cmp_op[code[jump_prev + 1]] == "exception-match": return - if self.version >= 3.5: + if self.version >= (3, 5): # Python 3.5 may remove as dead code a JUMP # instruction after a RETURN_VALUE. So we check # based on seeing SETUP_EXCEPT various places. @@ -399,7 +399,7 @@ def detect_control_flow(self, offset, targets, inst_index): pass pass if code[pre_rtarget] == self.opc.RETURN_VALUE: - if self.version == 3.0: + if self.version == (3, 0): next_op = rtarget if code[next_op] == self.opc.POP_TOP: next_op = rtarget @@ -437,7 +437,7 @@ def detect_control_flow(self, offset, targets, inst_index): self.fixed_jumps[offset] = self.restrict_to_parent(target, parent) pass pass - elif self.version >= 3.5: + elif self.version >= (3, 5): # 3.5+ has Jump optimization which too often causes RETURN_VALUE to get # misclassified as RETURN_END_IF. Handle that here. # In RETURN_VALUE, JUMP_ABSOLUTE, RETURN_VALUE is never RETURN_END_IF @@ -483,4 +483,3 @@ def detect_control_flow(self, offset, targets, inst_index): pass else: print("Need to be Python 3.0 to demo; I am version %s" % version_tuple_to_str()) -[w diff --git a/uncompyle6/scanners/scanner33.py b/uncompyle6/scanners/scanner33.py index 1d5d75039..1c4a5aa9c 100644 --- a/uncompyle6/scanners/scanner33.py +++ b/uncompyle6/scanners/scanner33.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2019, 2021-2022 by Rocky Bernstein +# Copyright (c) 2015-2019, 2021-2022, 2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,20 +19,22 @@ scanner routine for Python 3. """ -from __future__ import print_function - # bytecode verification, verify(), uses JUMP_OPs from here from xdis.opcodes import opcode_33 as opc -JUMP_OPS = opc.JUMP_OPS from uncompyle6.scanners.scanner3 import Scanner3 -class Scanner33(Scanner3): +JUMP_OPS = opc.JUMP_OPS + + +class Scanner33(Scanner3): def __init__(self, show_asm=False, is_pypy=False): Scanner3.__init__(self, (3, 3), show_asm) return + pass + if __name__ == "__main__": from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str diff --git a/uncompyle6/scanners/scanner35.py b/uncompyle6/scanners/scanner35.py index 9af3aaedc..52a825720 100644 --- a/uncompyle6/scanners/scanner35.py +++ b/uncompyle6/scanners/scanner35.py @@ -1,4 +1,4 @@ -# Copyright (c) 2017, 2021-2022 by Rocky Bernstein +# Copyright (c) 2017, 2021-2022, 2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -22,21 +22,22 @@ scanner routine for Python 3. """ -from __future__ import print_function +# bytecode verification, verify(), uses JUMP_OPs from here +from xdis.opcodes import opcode_35 as opc from uncompyle6.scanners.scanner3 import Scanner3 -# bytecode verification, verify(), uses JUMP_OPs from here -from xdis.opcodes import opcode_35 as opc JUMP_OPS = opc.JUMP_OPS -class Scanner35(Scanner3): +class Scanner35(Scanner3): def __init__(self, show_asm=None, is_pypy=False): Scanner3.__init__(self, (3, 5), show_asm, is_pypy) return + pass + if __name__ == "__main__": from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str diff --git a/uncompyle6/scanners/scanner37.py b/uncompyle6/scanners/scanner37.py index 464669c94..af4e8f5ec 100644 --- a/uncompyle6/scanners/scanner37.py +++ b/uncompyle6/scanners/scanner37.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2019, 2021-2022 by Rocky Bernstein +# Copyright (c) 2016-2019, 2021-2023 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . """ -Python 3.7 bytecode decompiler scanner +Python 3.7 bytecode decompiler scanner. Does some additional massaging of xdis-disassembled instructions to make things easier for decompilation. @@ -24,20 +24,20 @@ from typing import Tuple -from uncompyle6.scanner import CONST_COLLECTIONS -from uncompyle6.scanners.tok import Token -from uncompyle6.scanners.scanner37base import Scanner37Base - # bytecode verification, verify(), uses JUMP_OPs from here from xdis.opcodes import opcode_37 as opc +from uncompyle6.scanner import CONST_COLLECTIONS, Token +from uncompyle6.scanners.scanner37base import Scanner37Base + # bytecode verification, verify(), uses JUMP_OPS from here JUMP_OPs = opc.JUMP_OPS + class Scanner37(Scanner37Base): - def __init__(self, show_asm=None, is_pypy: bool=False): - Scanner37Base.__init__(self, (3, 7), show_asm) - self.is_pypy = is_pypy + def __init__(self, show_asm=None, debug="", is_pypy=False): + Scanner37Base.__init__(self, (3, 7), show_asm, debug, is_pypy) + self.debug = debug return pass @@ -51,9 +51,9 @@ def bound_collection_from_tokens( assert count <= i if collection_type == "CONST_DICT": - # constant dictonaries work via BUILD_CONST_KEY_MAP and + # constant dictionaries work via BUILD_CONST_KEY_MAP and # handle the values() like sets and lists. - # However the keys() are an LOAD_CONST of the keys. + # However, the keys() are an LOAD_CONST of the keys. # adjust offset to account for this count += 1 @@ -90,6 +90,7 @@ def bound_collection_from_tokens( has_arg=True, has_extended_arg=False, opc=self.opc, + optype="pseudo", ) ) for j in range(collection_start, i): @@ -103,6 +104,7 @@ def bound_collection_from_tokens( has_arg=True, has_extended_arg=False, opc=self.opc, + optype=tokens[j].optype, ) ) new_tokens.append( @@ -120,7 +122,7 @@ def bound_collection_from_tokens( return new_tokens def ingest( - self, co, classname=None, code_objects={}, show_asm=None + self, bytecode, classname=None, code_objects={}, show_asm=None ) -> Tuple[list, dict]: """ Create "tokens" the bytecode of an Python code object. Largely these @@ -140,7 +142,9 @@ def ingest( grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific rules for the specific number of arguments they take. """ - tokens, customize = Scanner37Base.ingest(self, co, classname, code_objects, show_asm) + tokens, customize = Scanner37Base.ingest( + self, bytecode, classname, code_objects, show_asm + ) new_tokens = [] for i, t in enumerate(tokens): # things that smash new_tokens like BUILD_LIST have to come first. @@ -180,6 +184,7 @@ def ingest( return new_tokens, customize + if __name__ == "__main__": from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str @@ -192,4 +197,4 @@ def ingest( print(t.format()) pass else: - print("Need to be Python 3.7 to demo; I am version %s" % version_tuple_to_str()) + print(f"Need to be Python 3.7 to demo; I am version {version_tuple_to_str()}.") diff --git a/uncompyle6/scanners/scanner37base.py b/uncompyle6/scanners/scanner37base.py index 238006ed2..dd055fb0f 100644 --- a/uncompyle6/scanners/scanner37base.py +++ b/uncompyle6/scanners/scanner37base.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2020, 2022 by Rocky Bernstein +# Copyright (c) 2015-2020, 2022-2024, 2026 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # @@ -15,7 +15,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . """ -Python 37 bytecode scanner/deparser base. +Python 3.7 bytecode scanner/deparser base. Also we *modify* the instruction sequence to assist deparsing code. For example: @@ -29,30 +29,40 @@ Finally we save token information. """ -from typing import Any, Dict, List, Set - -from xdis import iscode, instruction_size, Instruction -from xdis.bytecode import _get_const_info +import sys +from typing import Any, Dict, List, Set, Tuple -from uncompyle6.scanner import Token import xdis +from xdis import Instruction, instruction_size, iscode +from xdis.bytecode import _get_const_info # Get all the opcodes into globals -import xdis.opcodes.opcode_37 as op3 +from xdis.opcodes import opcode_37 as op3 -from uncompyle6.scanner import Scanner - -import sys +from uncompyle6.scanner import Scanner, Token globals().update(op3.opmap) +CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT") + + class Scanner37Base(Scanner): - def __init__(self, version, show_asm=None, is_pypy=False): + def __init__( + self, version: Tuple[int, int], show_asm=None, debug="", is_pypy=False + ): super(Scanner37Base, self).__init__(version, show_asm, is_pypy) + self.offset2tok_index = None + self.debug = debug + + # True is code is from PyPy + self.is_pypy = is_pypy + + # Bytecode converted into instruction + self.insts = [] # Create opcode classification sets - # Note: super initilization above initializes self.opc + # Note: super initialization above initializes self.opc # Ops that start SETUP_ ... We will COME_FROM with these names # Some blocks and END_ statements. And they can start @@ -137,7 +147,7 @@ def __init__(self, version, show_asm=None, is_pypy=False): self.opc.POP_JUMP_IF_FALSE, ] ) - # Not really a set, but still clasification-like + # Not really a set, but still classification-like self.statement_opcode_sequences = [ (self.opc.POP_JUMP_IF_FALSE, self.opc.JUMP_FORWARD), (self.opc.POP_JUMP_IF_FALSE, self.opc.JUMP_ABSOLUTE), @@ -182,8 +192,7 @@ def __init__(self, version, show_asm=None, is_pypy=False): return def ingest(self, co, classname=None, code_objects={}, show_asm=None): - """ - Create "tokens" the bytecode of an Python code object. Largely these + """Create "tokens" the bytecode of an Python code object. Largely these are the opcode name, but in some cases that has been modified to make parsing easier. returning a list of uncompyle6 Token's. @@ -191,14 +200,18 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): Some transformations are made to assist the deparsing grammar: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - - operands with stack argument counts or flag masks are appended to the opcode name, e.g.: - * BUILD_LIST, BUILD_SET - * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments + - operands with stack argument counts or flag masks are appended to the + opcode name, e.g.: + * BUILD_LIST, BUILD_SET + * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional + arguments - EXTENDED_ARGS instructions are removed - Also, when we encounter certain tokens, we add them to a set which will cause custom - grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST - cause specific rules for the specific number of arguments they take. + Also, when we encounter certain tokens, we add them to a set + which will cause custom grammar rules. Specifically, variable + arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific + rules for the specific number of arguments they take. + """ def tokens_append(j, token): @@ -213,10 +226,16 @@ def tokens_append(j, token): bytecode = self.build_instructions(co) - # show_asm = 'both' if show_asm in ("both", "before"): - for instr in bytecode.get_instructions(co): - print(instr.disassemble(self.opc)) + print("\n# ---- disassembly:") + bytecode.disassemble_bytes( + co.co_code, + line_starts=bytecode._linestarts, + asm_format="extended", + filename=co.co_filename, + show_source=True, + first_line_number=co.co_firstlineno, + ) # "customize" is in the process of going away here customize = {} @@ -235,7 +254,6 @@ def tokens_append(j, token): n = len(self.insts) for i, inst in enumerate(self.insts): - # We need to detect the difference between: # raise AssertionError # and @@ -249,10 +267,9 @@ def tokens_append(j, token): if ( next_inst.opname == "LOAD_GLOBAL" and next_inst.argval == "AssertionError" - and inst.argval + and inst.argval is not None ): - raise_idx = self.offset2inst_index[self.prev_op[inst.argval]] - raise_inst = self.insts[raise_idx] + raise_inst = self.get_inst(self.prev_op[inst.argval]) if raise_inst.opname.startswith("RAISE_VARARGS"): self.load_asserts.add(next_inst.offset) pass @@ -265,30 +282,33 @@ def tokens_append(j, token): # To simplify things we want to untangle this. We also # do this loop before we compute jump targets. for i, inst in enumerate(self.insts): - # One artifact of the "too-small" operand problem, is that # some backward jumps, are turned into forward jumps to another # "extended arg" backward jump to the same location. if inst.opname == "JUMP_FORWARD": - jump_inst = self.insts[self.offset2inst_index[inst.argval]] + jump_inst = self.get_inst(inst.argval) if jump_inst.has_extended_arg and jump_inst.opname.startswith("JUMP"): - # Create comination of the jump-to instruction and + # Create a combination of the jump-to instruction and # this one. Keep the position information of this instruction, # but the operator and operand properties come from the other # instruction self.insts[i] = Instruction( - jump_inst.opname, - jump_inst.opcode, - jump_inst.optype, - jump_inst.inst_size, - jump_inst.arg, - jump_inst.argval, - jump_inst.argrepr, - jump_inst.has_arg, - inst.offset, - inst.starts_line, - inst.is_jump_target, - inst.has_extended_arg, + opcode=jump_inst.opcode, + opname=jump_inst.opname, + arg=jump_inst.arg, + argval=jump_inst.argval, + argrepr=jump_inst.argrepr, + offset=inst.offset, + starts_line=inst.starts_line, + is_jump_target=inst.is_jump_target, + positions=None, + optype=jump_inst.optype, + has_arg=jump_inst.has_arg, + inst_size=jump_inst.inst_size, + has_extended_arg=inst.has_extended_arg, + fallthrough=False, + tos_str=None, + start_offset=None, ) # Get jump targets @@ -300,16 +320,9 @@ def tokens_append(j, token): j = 0 for i, inst in enumerate(self.insts): - argval = inst.argval op = inst.opcode - if inst.opname == "EXTENDED_ARG": - # FIXME: The EXTENDED_ARG is used to signal annotation - # parameters - if i + 1 < n and self.insts[i + 1].opcode != self.opc.MAKE_FUNCTION: - continue - if inst.offset in jump_targets: jump_idx = 0 # We want to process COME_FROMs to the same offset to be in *descending* @@ -335,13 +348,14 @@ def tokens_append(j, token): j = tokens_append( j, Token( - come_from_name, - jump_offset, - repr(jump_offset), + opname=come_from_name, + attr=jump_offset, + pattr=repr(jump_offset), offset="%s_%s" % (inst.offset, jump_idx), has_arg=True, opc=self.opc, has_extended_arg=False, + optype=inst.optype, ), ) jump_idx += 1 @@ -387,6 +401,11 @@ def tokens_append(j, token): if "." in inst.argval: opname = "IMPORT_NAME_ATTR" pass + + elif opname == "LOAD_FAST" and argval == ".0": + # Used as the parameter of a list expression + opname = "LOAD_ARG" + elif opname in ("MAKE_FUNCTION", "MAKE_CLOSURE"): flags = argval opname = "MAKE_FUNCTION_%d" % (flags) @@ -408,6 +427,7 @@ def tokens_append(j, token): has_arg=inst.has_arg, opc=self.opc, has_extended_arg=inst.has_extended_arg, + optype=inst.optype, ), ) continue @@ -435,9 +455,9 @@ def tokens_append(j, token): elif op == self.opc.JUMP_ABSOLUTE: # Refine JUMP_ABSOLUTE further in into: # - # * "JUMP_LOOP" - which are are used in loops. This is sometimes + # * "JUMP_LOOP" - which are used in loops. This is sometimes # found at the end of a looping construct - # * "BREAK_LOOP" - which are are used to break loops. + # * "BREAK_LOOP" - which are used to break loops. # * "CONTINUE" - jumps which may appear in a "continue" statement. # It is okay to confuse this with JUMP_LOOP. The # grammar should tolerate this. @@ -457,12 +477,17 @@ def tokens_append(j, token): next_opname = self.insts[i + 1].opname # 'Continue's include jumps to loops that are not - # and the end of a block which follow with POP_BLOCK and COME_FROM_LOOP. - # If the JUMP_ABSOLUTE is to a FOR_ITER and it is followed by another JUMP_FORWARD - # then we'll take it as a "continue". - is_continue = ( - self.insts[self.offset2inst_index[target]].opname == "FOR_ITER" - and self.insts[i + 1].opname == "JUMP_FORWARD" + # and the end of a block which follow with + # POP_BLOCK and COME_FROM_LOOP. If the + # JUMP_ABSOLUTE is to a FOR_ITER, and it is + # followed by another JUMP_FORWARD then we'll take + # it as a "continue". + next_inst = self.insts[i + 1] + is_continue = self.insts[ + self.offset2inst_index[target] + ].opname == "FOR_ITER" and next_inst.opname in ( + "JUMP_FORWARD", + "JUMP_ABSOLUTE", ) if self.version < (3, 8) and ( @@ -477,21 +502,65 @@ def tokens_append(j, token): ): opname = "CONTINUE" else: + # "continue" versus "break_loop" dectction is more complicated + # because "continue" to an outer loop is really a "break loop" opname = "JUMP_BACK" + # FIXME: this is a hack to catch stuff like: # if x: continue # the "continue" is not on a new line. - # There are other situations where we don't catch - # CONTINUE as well. - if tokens[-1].kind == "JUMP_BACK" and tokens[-1].attr <= argval: + # + # Another situation is where we have + # for method in methods: + # for B in method: + # if c: + # return + # break # A "continue" but not the innermost one + if tokens[-1].kind == "JUMP_LOOP" and tokens[-1].attr <= argval: if tokens[-2].kind == "BREAK_LOOP": del tokens[-1] + j -= 1 else: - # intern is used because we are changing the *previous* token - tokens[-1].kind = sys.intern("CONTINUE") - if last_op_was_break and opname == "CONTINUE": - last_op_was_break = False - continue + # "intern" is used because we are + # changing the *previous* token. A + # POP_TOP suggests a "break" rather + # than a "continue"? + if tokens[-2] == "POP_TOP" and ( + is_continue and next_inst.argval != tokens[-1].attr + ): + tokens[-1].kind = sys.intern("BREAK_LOOP") + else: + tokens[-1].kind = sys.intern("CONTINUE") + last_continue = tokens[-1] + pass + pass + pass + # elif ( + # last_continue is not None + # and tokens[-1].kind == "JUMP_LOOP" + # and last_continue.attr <= tokens[-1].attr + # and last_continue.offset > tokens[-1].attr + # ): + # # Handle mis-characterized "CONTINUE" + # # We have a situation like: + # # loop ... for or while) + # # loop + # # if ...: # code below starts here + # # break # not continue + # # + # # POP_JUMP_IF_FALSE_LOOP # to outer loop + # # JUMP_LOOP # to inner loop + # # ... + # # JUMP_LOOP # to outer loop + # tokens[-2].kind = sys.intern("BREAK_LOOP") + # pass + + # if last_op_was_break and opname == "CONTINUE": + # last_op_was_break = False + # continue + pass + else: + opname = "JUMP_FORWARD" elif inst.offset in self.load_asserts: opname = "LOAD_ASSERT" @@ -509,12 +578,15 @@ def tokens_append(j, token): has_arg=inst.has_arg, opc=self.opc, has_extended_arg=inst.has_extended_arg, + optype=inst.optype, ), ) pass - if show_asm in ("both", "after"): - for t in tokens: + if show_asm in ("both", "after") and self.version < (3, 8): + print("\n# ---- tokenization:") + # FIXME: t.format() is changing tokens! + for t in tokens.copy(): print(t.format(line_prefix="")) print() return tokens, customize @@ -883,16 +955,6 @@ def detect_control_flow( pass return - def is_jump_back(self, offset, extended_arg): - """ - Return True if the code at offset is some sort of jump back. - That is, it is ether "JUMP_FORWARD" or an absolute jump that - goes forward. - """ - if self.code[offset] != self.opc.JUMP_ABSOLUTE: - return False - return offset > self.get_target(offset, extended_arg) - def next_except_jump(self, start): """ Return the next jump that was generated by an except SomeException: @@ -932,7 +994,7 @@ def next_except_jump(self, start): if __name__ == "__main__": from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str - if PYTHON_VERSION_TRIPLE[:2] == (3, 7): + if (3, 7) <= PYTHON_VERSION_TRIPLE[:2] < (3, 9): import inspect co = inspect.currentframe().f_code # type: ignore @@ -941,5 +1003,8 @@ def next_except_jump(self, start): for t in tokens: print(t) else: - print(f"Need to be Python 3.7 to demo; I am version {version_tuple_to_str()}.") + print( + "Need to be Python 3.7..3.8 to demo; " + f"I am version {version_tuple_to_str()}." + ) pass diff --git a/uncompyle6/scanners/scanner38.py b/uncompyle6/scanners/scanner38.py index 0aad8cd75..a5a0410db 100644 --- a/uncompyle6/scanners/scanner38.py +++ b/uncompyle6/scanners/scanner38.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022 by Rocky Bernstein +# Copyright (c) 2019-2022, 2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -22,26 +22,30 @@ scanner routine for Python 3.7 and up. """ -from uncompyle6.scanners.tok import off2int -from uncompyle6.scanners.scanner37 import Scanner37 -from uncompyle6.scanners.scanner37base import Scanner37Base +from typing import Dict, Tuple # bytecode verification, verify(), uses JUMP_OPs from here from xdis.opcodes import opcode_38 as opc +from uncompyle6.scanners.scanner37 import Scanner37 +from uncompyle6.scanners.scanner37base import Scanner37Base +from uncompyle6.scanners.tok import off2int + # bytecode verification, verify(), uses JUMP_OPS from here JUMP_OPs = opc.JUMP_OPS class Scanner38(Scanner37): - def __init__(self, show_asm=None): - Scanner37Base.__init__(self, (3, 8), show_asm) - self.debug = False + def __init__(self, show_asm=None, debug="", is_pypy=False): + Scanner37Base.__init__(self, (3, 8), show_asm, debug, is_pypy) + self.debug = debug return pass - def ingest(self, co, classname=None, code_objects={}, show_asm=None): + def ingest( + self, bytecode, classname=None, code_objects={}, show_asm=None + ) -> Tuple[list, dict]: """ Create "tokens" the bytecode of an Python code object. Largely these are the opcode name, but in some cases that has been modified to make parsing @@ -61,7 +65,7 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): cause specific rules for the specific number of arguments they take. """ tokens, customize = super(Scanner38, self).ingest( - co, classname, code_objects, show_asm + bytecode, classname, code_objects, show_asm ) # Hacky way to detect loop ranges. @@ -69,7 +73,7 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): # The value is where the loop ends. In current Python, # JUMP_BACKS are always to loops. And blocks are ordered so that the # JUMP_BACK with the highest offset will be where the range ends. - jump_back_targets = {} + jump_back_targets: Dict[int, int] = {} for token in tokens: if token.kind == "JUMP_BACK": jump_back_targets[token.attr] = token.offset @@ -88,7 +92,7 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): if offset == next_end: loop_ends.pop() if self.debug: - print("%sremove loop offset %s" % (" " * len(loop_ends), offset)) + print(f"{' ' * len(loop_ends)}remove loop offset {offset}") pass next_end = ( loop_ends[-1] @@ -102,13 +106,12 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): next_end = off2int(jump_back_targets[offset], prefer_last=False) if self.debug: print( - "%sadding loop offset %s ending at %s" - % (" " * len(loop_ends), offset, next_end) + f"{' ' * len(loop_ends)}adding loop offset {offset} ending at {next_end}" ) loop_ends.append(next_end) # Turn JUMP opcodes into "BREAK_LOOP" opcodes. - # FIXME: this should be replaced by proper control flow. + # FIXME!!!!: this should be replaced by proper control flow. if opname in ("JUMP_FORWARD", "JUMP_ABSOLUTE") and len(loop_ends): jump_target = token.attr @@ -118,35 +121,26 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): new_tokens.append(token) continue - # We also want to avoid confusing BREAK_LOOPS with parts of the - # grammar rules for loops. (Perhaps we should change the grammar.) - # Try to find an adjacent JUMP_BACK which is part of the normal loop end. - - if i + 1 < len(tokens) and tokens[i + 1] == "JUMP_BACK": - # Sometimes the jump back is after the "break" instruction.. - jump_back_index = i + 1 - else: - # and sometimes, because of jump-to-jump optimization, it is before the - # jump target instruction. - jump_back_index = self.offset2tok_index[jump_target] - 1 - while tokens[jump_back_index].kind.startswith("COME_FROM_"): - jump_back_index -= 1 - pass - pass - jump_back_token = tokens[jump_back_index] - - # Is this a forward jump not next to a JUMP_BACK ? ... - break_loop = token.linestart and jump_back_token != "JUMP_BACK" + j = i + while tokens[j - 1] in ("POP_TOP", "POP_BLOCK", "POP_EXCEPT"): + j -= 1 + if tokens[j].linestart: + break + token_with_linestart = tokens[j] - # or if there is looping jump back, then that loop - # should start before where the "break" instruction sits. - if break_loop or ( - jump_back_token == "JUMP_BACK" - and jump_back_token.attr < token.off2int() - ): + if token_with_linestart.linestart: token.kind = "BREAK_LOOP" + pass new_tokens.append(token) + + if show_asm in ("both", "after"): + print("\n# ---- tokenization:") + # FIXME: t.format() is changing tokens! + for t in new_tokens.copy(): + print(t.format(line_prefix="")) + print() + return new_tokens, customize @@ -162,4 +156,4 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None): print(t.format()) pass else: - print("Need to be Python 3.8 to demo; I am version %s" % version_tuple_to_str()) + print(f"Need to be Python 3.8 to demo; I am version {version_tuple_to_str()}.") diff --git a/uncompyle6/scanners/tok.py b/uncompyle6/scanners/tok.py index f98b63064..01d4849c2 100644 --- a/uncompyle6/scanners/tok.py +++ b/uncompyle6/scanners/tok.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2021 by Rocky Bernstein +# Copyright (c) 2016-2021, 2023-2025 by Rocky Bernstein # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock # @@ -15,7 +15,9 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -import re, sys +import re +import sys +from typing import Optional, Union intern = sys.intern @@ -59,17 +61,19 @@ def __init__( opname, attr=None, pattr=None, - offset=-1, + offset: Union[int, str] = -1, linestart=None, op=None, has_arg=None, opc=None, has_extended_arg=False, + optype=None, ): self.kind = intern(opname) self.has_arg = has_arg - self.attr = attr + self.attr: Optional[int] = attr self.pattr = pattr + self.optype = optype if has_extended_arg: self.offset = "%d_%d" % (offset, offset + 2) else: @@ -87,7 +91,7 @@ def __init__( print(f"I don't know about Python version {e} yet.") try: version_tuple = tuple(int(i) for i in str(e)[1:-1].split(".")) - except: + except Exception: pass else: if version_tuple > (3, 9): @@ -105,8 +109,8 @@ def __init__( self.op = op def __eq__(self, o): - """ '==' on kind and "pattr" attributes. - It is okay if offsets and linestarts are different""" + """'==' on kind and "pattr" attributes. + It is okay if offsets and linestarts are different""" if isinstance(o, Token): return (self.kind == o.kind) and ( (self.pattr == o.pattr) or self.attr == o.attr @@ -116,7 +120,7 @@ def __eq__(self, o): return self.kind == o def __ne__(self, o): - """ '!=', but it's okay if offsets and linestarts are different""" + """'!=', but it's okay if offsets and linestarts are different""" return not self.__eq__(o) def __repr__(self): @@ -179,7 +183,7 @@ def format(self, line_prefix="", token_num=None): elif name == "LOAD_ASSERT": return "%s%s %s" % (prefix, offset_opname, pattr) elif self.op in self.opc.NAME_OPS: - if self.opc.version >= 3.0: + if self.opc.version_tuple >= (3, 0): return "%s%s%s %s" % (prefix, offset_opname, argstr, self.attr) elif name == "EXTENDED_ARG": return "%s%s%s 0x%x << %s = %s" % ( diff --git a/uncompyle6/semantics/aligner.py b/uncompyle6/semantics/aligner.py index 1cc68a565..474c1b7c3 100644 --- a/uncompyle6/semantics/aligner.py +++ b/uncompyle6/semantics/aligner.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018, 2022 by Rocky Bernstein +# Copyright (c) 2018, 2022-2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,41 +14,67 @@ # along with this program. If not, see . import sys -from uncompyle6.semantics.pysource import ( - SourceWalker, SourceWalkerError, find_globals, ASSIGN_DOC_STRING, RETURN_NONE) from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG -from uncompyle6 import IS_PYPY +from xdis import iscode + +from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE + +from uncompyle6.scanner import get_scanner +from uncompyle6.semantics.consts import ASSIGN_DOC_STRING + +from uncompyle6.semantics.pysource import ( + RETURN_NONE, + TREE_DEFAULT_DEBUG, + SourceWalker, + SourceWalkerError, + find_globals_and_nonlocals +) +from uncompyle6.show import maybe_show_asm + +# + class AligningWalker(SourceWalker, object): - def __init__(self, version, out, scanner, showast=False, - debug_parser=PARSER_DEFAULT_DEBUG, - compile_mode='exec', is_pypy=False): - SourceWalker.__init__(self, version, out, scanner, showast, debug_parser, - compile_mode, is_pypy) + def __init__( + self, + version, + out, + scanner, + showast=TREE_DEFAULT_DEBUG, + debug_parser=PARSER_DEFAULT_DEBUG, + compile_mode="exec", + is_pypy=False, + ): + SourceWalker.__init__( + self, version, out, scanner, showast, debug_parser, compile_mode, is_pypy + ) self.desired_line_number = 0 self.current_line_number = 0 + self.showast = showast def println(self, *data): - if data and not(len(data) == 1 and data[0] == ''): + if data and not (len(data) == 1 and data[0] == ""): self.write(*data) self.pending_newlines = max(self.pending_newlines, 1) def write(self, *data): if (len(data) == 1) and data[0] == self.indent: - diff = max(self.pending_newlines, - self.desired_line_number - self.current_line_number) - self.f.write('\n'*diff) + diff = max( + self.pending_newlines, + self.desired_line_number - self.current_line_number, + ) + self.f.write("\n" * diff) self.current_line_number += diff self.pending_newlines = 0 - if (len(data) == 0) or (len(data) == 1 and data[0] == ''): + if (len(data) == 0) or (len(data) == 1 and data[0] == ""): return - out = ''.join((str(j) for j in data)) + out = "".join((str(j) for j in data)) n = 0 for i in out: - if i == '\n': + if i == "\n": n += 1 if n == len(out): self.pending_newlines = max(self.pending_newlines, n) @@ -61,25 +87,27 @@ def write(self, *data): break if self.pending_newlines > 0: - diff = max(self.pending_newlines, - self.desired_line_number - self.current_line_number) - self.f.write('\n'*diff) + diff = max( + self.pending_newlines, + self.desired_line_number - self.current_line_number, + ) + self.f.write("\n" * diff) self.current_line_number += diff self.pending_newlines = 0 for i in out[::-1]: - if i == '\n': + if i == "\n": self.pending_newlines += 1 else: break if self.pending_newlines: - out = out[:-self.pending_newlines] + out = out[: -self.pending_newlines] self.f.write(out) def default(self, node): mapping = self._get_mapping(node) - if hasattr(node, 'linestart'): + if hasattr(node, "linestart"): if node.linestart: self.desired_line_number = node.linestart table = mapping[0] @@ -89,26 +117,23 @@ def default(self, node): key = key[i] pass - if key.type in table: - self.engine(table[key.type], node) + if key.kind in table: + self.template_engine(table[key.kind], node) self.prune() -from xdis import iscode -from uncompyle6.scanner import get_scanner -from uncompyle6.show import ( - maybe_show_asm, -) -# -DEFAULT_DEBUG_OPTS = { - 'asm': False, - 'tree': False, - 'grammar': False -} - -def code_deparse_align(co, out=sys.stderr, version=None, is_pypy=None, - debug_opts=DEFAULT_DEBUG_OPTS, - code_objects={}, compile_mode='exec'): +DEFAULT_DEBUG_OPTS = {"asm": False, "tree": TREE_DEFAULT_DEBUG, "grammar": False} + + +def code_deparse_align( + co, + out=sys.stderr, + version=None, + is_pypy=None, + debug_opts=DEFAULT_DEBUG_OPTS, + code_objects={}, + compile_mode="exec", +): """ ingests and deparses a given code block 'co' """ @@ -116,65 +141,77 @@ def code_deparse_align(co, out=sys.stderr, version=None, is_pypy=None, assert iscode(co) if version is None: - version = float(sys.version[0:3]) + version = PYTHON_VERSION_TRIPLE if is_pypy is None: is_pypy = IS_PYPY - # store final output stream for case of error scanner = get_scanner(version, is_pypy=is_pypy) tokens, customize = scanner.ingest(co, code_objects=code_objects) - show_asm = debug_opts.get('asm', None) + show_asm = debug_opts.get("asm", None) maybe_show_asm(show_asm, tokens) debug_parser = dict(PARSER_DEFAULT_DEBUG) - show_grammar = debug_opts.get('grammar', None) - show_grammar = debug_opts.get('grammar', None) + show_grammar = debug_opts.get("grammar", None) + show_grammar = debug_opts.get("grammar", None) if show_grammar: - debug_parser['reduce'] = show_grammar - debug_parser['errorstack'] = True + debug_parser["reduce"] = show_grammar + debug_parser["errorstack"] = True # Build a parse tree from tokenized and massaged disassembly. - show_ast = debug_opts.get('ast', None) - deparsed = AligningWalker(version, scanner, out, showast=show_ast, - debug_parser=debug_parser, compile_mode=compile_mode, - is_pypy = is_pypy) - - is_top_level_module = co.co_name == '' - deparsed.ast = deparsed.build_ast(tokens, customize, co, is_top_level_module=is_top_level_module) - - assert deparsed.ast == 'stmts', 'Should have parsed grammar start' - - del tokens # save memory - - deparsed.mod_globs = find_globals(deparsed.ast, set()) + show_ast = debug_opts.get("ast", TREE_DEFAULT_DEBUG) + deparsed = AligningWalker( + version, + out, + scanner, + showast=show_ast, + debug_parser=debug_parser, + compile_mode=compile_mode, + is_pypy=is_pypy, + ) + + is_top_level_module = co.co_name == "" + deparsed.ast = deparsed.build_ast( + tokens, customize, co, is_top_level_module=is_top_level_module + ) + + assert deparsed.ast == "stmts", "Should have parsed grammar start" + + del tokens # save memory + + (deparsed.mod_globs, _) = find_globals_and_nonlocals( + deparsed.ast, set(), set(), co, version + ) # convert leading '__doc__ = "..." into doc string try: if deparsed.ast[0][0] == ASSIGN_DOC_STRING(co.co_consts[0]): - deparsed.print_docstring('', co.co_consts[0]) + deparsed.print_docstring("", co.co_consts[0]) del deparsed.ast[0] if deparsed.ast[-1] == RETURN_NONE: - deparsed.ast.pop() # remove last node + deparsed.ast.pop() # remove last node # todo: if empty, add 'pass' - except: + except Exception: pass # What we've been waiting for: Generate Python source from the parse tree! deparsed.gen_source(deparsed.ast, co.co_name, customize) for g in sorted(deparsed.mod_globs): - deparsed.write('# global %s ## Warning: Unused global\n' % g) + deparsed.write("# global %s ## Warning: Unused global\n" % g) if deparsed.ERROR: raise SourceWalkerError("Deparsing stopped due to parse error") return deparsed -if __name__ == '__main__': + +if __name__ == "__main__": + def deparse_test(co): "This is a docstring" deparsed = code_deparse_align(co) print(deparsed.text) return - deparse_test(deparse_test.__code__) + + deparse_test(deparse_test.func_code) diff --git a/uncompyle6/semantics/check_ast.py b/uncompyle6/semantics/check_ast.py index 203ff0a96..1f0f0c7b1 100644 --- a/uncompyle6/semantics/check_ast.py +++ b/uncompyle6/semantics/check_ast.py @@ -21,7 +21,7 @@ def checker(ast, in_loop, errors): if ast.kind in ("aug_assign1", "aug_assign2") and ast[0][0] == "and": text = str(ast) error_text = ( - "\n# improper augmented assigment (e.g. +=, *=, ...):\n#\t" + "\n# improper augmented assignment (e.g. +=, *=, ...):\n#\t" + "\n# ".join(text.split("\n")) + "\n" ) diff --git a/uncompyle6/semantics/consts.py b/uncompyle6/semantics/consts.py index e81cae1b3..b7e12d4f7 100644 --- a/uncompyle6/semantics/consts.py +++ b/uncompyle6/semantics/consts.py @@ -1,4 +1,4 @@ -# Copyright (c) 2017-2022 by Rocky Bernstein +# Copyright (c) 2017-2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,15 +14,17 @@ # along with this program. If not, see . """Constants and initial table values used in pysource.py and fragments.py""" -import re, sys +import re +import sys + from uncompyle6.parsers.treenode import SyntaxTree -from uncompyle6.scanners.tok import Token, NoneToken +from uncompyle6.scanners.tok import NoneToken, Token minint = -sys.maxsize - 1 maxint = sys.maxsize -# Operator precidence See +# Operator precedence See # https://docs.python.org/2/reference/expressions.html#operator-precedence # or # https://docs.python.org/3/reference/expressions.html#operator-precedence @@ -37,27 +39,31 @@ # various templates we use odd values. Avoiding equal-precedent comparisons # avoids ambiguity what to do when the precedence is equal. -# The precidence of a key below applies the key, a node, and the its -# *parent*. A node however sometimes sets the precidence for its -# children. For example, "call" has precidence 2 so we don't get +# The precedence of a key below applies the key, a node, and the its +# *parent*. A node however sometimes sets the precedence for its +# children. For example, "call" has precedence 2 so we don't get # additional the additional parenthesis of: ".. op (call())". However -# for call's children, it parameters, we set the the precidence high, -# say to 100, to make sure we avoid additional prenthesis in +# for call's children, it parameters, we set the the precedence high, +# say to 100, to make sure we avoid additional parenthesis in # call((.. op ..)). NO_PARENTHESIS_EVER = 100 +PARENTHESIS_ALWAYS = -2 # fmt: off PRECEDENCE = { "named_expr": 40, # := - "yield": 38, # Needs to be below named_expr - "yield_from": 38, - "tuple_list_starred": 38, # *x, *y, *z - about at the level of yield? "dict_unpack": 38, # **kwargs "list_unpack": 38, # *args + "yield_from": 38, + "tuple_list_starred": 38, # *x, *y, *z - about at the level of yield? + "unpack": 38, # A guess. Used in "async with ... as ... + # This might also get used in tuple assignment? "_lambda_body": 30, - "lambda_body": 30, # lambda ... : lambda_body + "lambda_body": 32, # lambda ... : lambda_body + + "yield": 30, # Needs to be below named_expr and lambda_body "if_exp": 28, # IfExp ( a if x else b) "if_exp_lambda": 28, # IfExp involving a lambda expression @@ -126,13 +132,13 @@ # Some parse trees created below are used for comparing code # fragments (like "return None" at the end of functions). -ASSIGN_DOC_STRING = lambda doc_string, doc_load: SyntaxTree( +ASSIGN_DOC_STRING = lambda doc_string, doc_load: SyntaxTree( # noqa "assign", [ SyntaxTree( "expr", [Token(doc_load, pattr=doc_string, attr=doc_string)] ), - SyntaxTree("store", [Token("STORE_NAME", pattr="__doc__")]), + SyntaxTree("store", [Token("STORE_NAME", pattr="__doc__", optype="name")]), ], ) @@ -144,14 +150,15 @@ "assign", [ SyntaxTree( - "expr", [Token("LOAD_NAME", pattr="__name__", offset=0, has_arg=True)] + "expr", [Token("LOAD_NAME", pattr="__name__", offset=0, has_arg=True, optype="name")] ), SyntaxTree( - "store", [Token("STORE_NAME", pattr="__module__", offset=3, has_arg=True)] + "store", [Token("STORE_NAME", pattr="__module__", offset=3, has_arg=True, optype="name")] ), ], ) +NEWLINE = SyntaxTree("newline", []) NONE = SyntaxTree("expr", [NoneToken]) RETURN_NONE = SyntaxTree("stmt", [SyntaxTree("return", [NONE, Token("RETURN_VALUE")])]) @@ -185,205 +192,123 @@ # } TABLE_DIRECT = { - "BINARY_ADD": ("+",), - "BINARY_SUBTRACT": ("-",), - "BINARY_MULTIPLY": ("*",), - "BINARY_DIVIDE": ("/",), - "BINARY_MATRIX_MULTIPLY": ("@",), - "BINARY_TRUE_DIVIDE": ("/",), # Not in <= 2.1 - "BINARY_FLOOR_DIVIDE": ("//",), - "BINARY_MODULO": ("%%",), - "BINARY_POWER": ("**",), - "BINARY_LSHIFT": ("<<",), - "BINARY_RSHIFT": (">>",), - "BINARY_AND": ("&",), - "BINARY_OR": ("|",), - "BINARY_XOR": ("^",), - "INPLACE_ADD": ("+=",), - "INPLACE_SUBTRACT": ("-=",), - "INPLACE_MULTIPLY": ("*=",), - "INPLACE_MATRIX_MULTIPLY": ("@=",), - "INPLACE_DIVIDE": ("/=",), - "INPLACE_TRUE_DIVIDE": ("/=",), # Not in <= 2.1; 2.6 generates INPLACE_DIVIDE only? - "INPLACE_FLOOR_DIVIDE": ("//=",), - "INPLACE_MODULO": ("%%=",), - "INPLACE_POWER": ("**=",), - "INPLACE_LSHIFT": ("<<=",), - "INPLACE_RSHIFT": (">>=",), - "INPLACE_AND": ("&=",), - "INPLACE_OR": ("|=",), - "INPLACE_XOR": ("^=",), - # bin_op (formerly "binary_expr") is the Python AST BinOp - "bin_op": ("%c %c %c", 0, (-1, "binary_operator"), (1, "expr")), - "UNARY_POSITIVE": ("+",), - "UNARY_NEGATIVE": ("-",), - "UNARY_INVERT": ("~"), - # unary_op (formerly "unary_expr") is the Python AST UnaryOp - "unary_op": ("%c%c", (1, "unary_operator"), (0, "expr")), - "unary_not": ("not %c", (0, "expr")), - "unary_convert": ("`%c`", (0, "expr"),), - "get_iter": ("iter(%c)", (0, "expr"),), - "slice0": ("%c[:]", (0, "expr"),), - "slice1": ("%c[%p:]", (0, "expr"), (1, 100)), - "slice2": ("%c[:%p]", (0, "expr"), (1, 100)), - "slice3": ("%c[%p:%p]", (0, "expr"), (1, 100), (2, 100)), - "IMPORT_FROM": ("%{pattr}",), - "IMPORT_NAME_ATTR": ("%{pattr}",), - "attribute": ("%c.%[1]{pattr}", (0, "expr")), - "LOAD_STR": ("%{pattr}",), - "LOAD_FAST": ("%{pattr}",), - "LOAD_NAME": ("%{pattr}",), - "LOAD_CLASSNAME": ("%{pattr}",), - "LOAD_GLOBAL": ("%{pattr}",), - "LOAD_DEREF": ("%{pattr}",), - "LOAD_LOCALS": ("locals()",), - "LOAD_ASSERT": ("%{pattr}",), - "DELETE_FAST": ("%|del %{pattr}\n",), - "DELETE_NAME": ("%|del %{pattr}\n",), - "DELETE_GLOBAL": ("%|del %{pattr}\n",), - "delete_subscript": ( - "%|del %p[%c]\n", - (0, "expr", PRECEDENCE["subscript"]), - (1, "expr"), - ), + "BINARY_ADD": ( "+" ,), + "BINARY_AND": ( "&" ,), + "BINARY_DIVIDE": ( "/" ,), + "BINARY_FLOOR_DIVIDE": ( "//" ,), + "BINARY_LSHIFT": ( "<<",), + "BINARY_MATRIX_MULTIPLY": ( "@" ,), + "BINARY_MODULO": ( "%%",), + "BINARY_MULTIPLY": ( "*" ,), + "BINARY_OR": ( "|" ,), + "BINARY_POWER": ( "**",), + "BINARY_RSHIFT": ( ">>",), + "BINARY_SUBTRACT": ( "-" ,), + "BINARY_TRUE_DIVIDE": ( "/" ,), # Not in <= 2.1; 2.6 generates INPLACE_DIVIDE only? + "BINARY_XOR": ( "^" ,), + "DELETE_FAST": ( "%|del %{pattr}\n", ), + "DELETE_GLOBAL": ( "%|del %{pattr}\n", ), + "DELETE_NAME": ( "%|del %{pattr}\n", ), + "IMPORT_FROM": ( "%{pattr}", ), + "IMPORT_NAME_ATTR": ( "%{pattr}", ), + "INPLACE_ADD": ( "+=" ,), + "INPLACE_AND": ( "&=" ,), + "INPLACE_DIVIDE": ( "/=" ,), + "INPLACE_FLOOR_DIVIDE": ( "//=" ,), + "INPLACE_LSHIFT": ( "<<=",), + "INPLACE_MATRIX_MULTIPLY": ( "@=" ,), + "INPLACE_MODULO": ( "%%=",), + "INPLACE_MULTIPLY": ( "*=" ,), + "INPLACE_OR": ( "|=" ,), + "INPLACE_POWER": ( "**=",), + "INPLACE_RSHIFT": ( ">>=",), + "INPLACE_SUBTRACT": ( "-=" ,), + "INPLACE_TRUE_DIVIDE": ( "/=" ,), + "INPLACE_XOR": ( "^=" ,), + "LOAD_ARG": ( "%{pattr}", ), + "LOAD_ASSERT": ( "%{pattr}", ), + "LOAD_CLASSNAME": ( "%{pattr}", ), + "LOAD_DEREF": ( "%{pattr}", ), + "LOAD_FAST": ( "%{pattr}", ), + "LOAD_GLOBAL": ( "%{pattr}", ), + "LOAD_LOCALS": ( "locals()", ), + "LOAD_NAME": ( "%{pattr}", ), + "LOAD_STR": ( "%{pattr}", ), + "STORE_DEREF": ( "%{pattr}", ), + "STORE_FAST": ( "%{pattr}", ), + "STORE_GLOBAL": ( "%{pattr}", ), + "STORE_NAME": ( "%{pattr}", ), + "UNARY_INVERT": ( "~"), + "UNARY_NEGATIVE": ( "-",), + "UNARY_NOT": ( "not ", ), + "UNARY_POSITIVE": ( "+",), - "subscript": ( - "%p[%p]", - (0, "expr", PRECEDENCE["subscript"]), - (1, "expr", NO_PARENTHESIS_EVER) - ), + "and": ("%c and %c", 0, 2), + "and2": ("%c", 3), - "subscript2": ( - "%p[%p]", - (0, "expr", PRECEDENCE["subscript"]), - (1, "expr", NO_PARENTHESIS_EVER) - ), + "assert_expr_or": ("%c or %c", 0, 2), + "assert_expr_and": ("%c and %c", 0, 2), + + "assign": ( + "%|%c = %p\n", + -1, + (0, ("expr", "branch_op"), PRECEDENCE["tuple_list_starred"] + 1) + ), + + "attribute": ("%c.%[1]{pattr}", (0, "expr")), - "store_subscript": ("%p[%c]", (0, "expr", PRECEDENCE["subscript"]), (1, "expr")), - "STORE_FAST": ("%{pattr}",), - "STORE_NAME": ("%{pattr}",), - "STORE_GLOBAL": ("%{pattr}",), - "STORE_DEREF": ("%{pattr}",), - "unpack": ("%C%,", (1, maxint, ", ")), - # This nonterminal we create on the fly in semantic routines - "unpack_w_parens": ("(%C%,)", (1, maxint, ", ")), # This nonterminal we create on the fly in semantic routines "attribute_w_parens": ("(%c).%[1]{pattr}", (0, "expr")), - # This nonterminal we create on the fly in semantic routines - "store_w_parens": ("(%c).%[1]{pattr}", (0, "expr")), - "unpack_list": ("[%C]", (1, maxint, ", ")), - "build_tuple2": ("%P", (0, -1, ", ", 100)), - "list_iter": ("%c", 0), - "list_for": (" for %c in %c%c", 2, 0, 3), - "list_if": (" if %p%c", (0, "expr", 27), 2), - "list_if_not": (" if not %p%c", (0, "expr", PRECEDENCE["unary_not"]), 2), - "lc_body": ("",), # ignore when recursing - "comp_iter": ("%c", 0), - "comp_if": (" if %c%c", 0, 2), - "comp_if_not": (" if not %p%c", (0, "expr", PRECEDENCE["unary_not"]), 2), - "comp_body": ("",), # ignore when recusing - "set_comp_body": ("%c", 0), - "gen_comp_body": ("%c", 0), - "dict_comp_body": ("%c:%c", 1, 0), - "assign": ("%|%c = %p\n", -1, (0, 200)), # The 2nd parameter should have a = suffix. # There is a rule with a 4th parameter "store" # which we don't use here. "aug_assign1": ("%|%c %c %c\n", 0, 2, 1), "aug_assign2": ("%|%c.%[2]{pattr} %c %c\n", 0, -3, -4), - "designList": ("%c = %c", 0, -1), - "and": ("%c and %c", 0, 2), - "ret_and": ("%c and %c", 0, 2), - "and2": ("%c", 3), - "or": ("%p or %p", (0, PRECEDENCE["or"]), (1, PRECEDENCE["or"])), - "ret_or": ("%c or %c", 0, 2), - "if_exp": ("%p if %c else %c", (2, "expr", 27), 0, 4), - "if_exp_lambda": ("%p if %c else %c", (2, "expr", 27), (0, "expr"), 4), - "if_exp_true": ("%p if 1 else %c", (0, "expr", 27), 2), - "if_exp_ret": ("%p if %p else %p", (2, 27), (0, 27), (-1, 27)), - "if_exp_not": ( - "%p if not %p else %p", - (2, 27), - (0, "expr", PRECEDENCE["unary_not"]), - (4, 27), + + # bin_op (formerly "binary_expr") is the Python AST BinOp + "bin_op": ("%c %c %c", 0, (-1, "binary_operator"), (1, "expr")), + + "break": ("%|break\n",), + "build_tuple2": ( + "%P", + (0, -1, ", ", NO_PARENTHESIS_EVER) ), - "if_exp_not_lambda": ("%p if not %c else %c", (2, "expr", 27), 0, 4), - "compare_single": ('%p %[-1]{pattr.replace("-", " ")} %p', (0, 19), (1, 19)), - "compare_chained": ("%p %p", (0, 29), (1, 30)), - "compare_chained1": ('%[3]{pattr.replace("-", " ")} %p %p', (0, 19), (-2, 19)), - "compare_chained2": ('%[1]{pattr.replace("-", " ")} %p', (0, 19)), - # "classdef": (), # handled by n_classdef() - # A custom rule in n_function def distinguishes whether to call this or - # function_def_async - "function_def": ("\n\n%|def %c\n", -2), # -2 to handle closures - "function_def_deco": ("\n\n%c", 0), - "mkfuncdeco": ("%|@%c\n%c", 0, 1), - # A custom rule in n_function def distinguishes whether to call this or - # function_def_async - "mkfuncdeco0": ("%|def %c\n", 0), - "classdefdeco": ("\n\n%c", 0), - "classdefdeco1": ("%|@%c\n%c", 0, 1), - "kwarg": ("%[0]{pattr}=%c", 1), # Change when Python 2 does LOAD_STR - "kwargs": ("%D", (0, maxint, ", ")), - "kwargs1": ("%D", (0, maxint, ", ")), - "assert_expr_or": ("%c or %c", 0, 2), - "assert_expr_and": ("%c and %c", 0, 2), - "print_items_stmt": ("%|print %c%c,\n", 0, 2), # Python 2 only - "print_items_nl_stmt": ("%|print %c%c\n", 0, 2), - "print_item": (", %c", 0), - "print_nl": ("%|print\n",), - "print_to": ("%|print >> %c, %c,\n", 0, 1), - "print_to_nl": ("%|print >> %c, %c\n", 0, 1), - "print_nl_to": ("%|print >> %c\n", 0), - "print_to_items": ("%C", (0, 2, ", ")), - # This is only generated by transform - # it is a string at the beginning of a function that is *not* a docstring - # 3.7 test_fstring.py tests for this kind of crap. - # For compatibility with older Python, we'll use "%" instead of - # a format string. - "string_at_beginning": ('%|"%%s" %% %c\n', 0), "call_stmt": ( "%|%p\n", # When a call statement contains only a named_expr (:=) # the named_expr should have parenthesis around it. (0, PRECEDENCE["named_expr"]-1)), - "break": ("%|break\n",), - "continue": ("%|continue\n",), - "raise_stmt0": ("%|raise\n",), - "raise_stmt1": ("%|raise %c\n", 0), - "raise_stmt3": ("%|raise %c, %c, %c\n", 0, 1, 2), - # "yield": ( "yield %c", 0), - # Note: we have a custom rule, which calls when we don't - # have "return None" - "return": ( "%|return %c\n", 0), + # "classdef": (), # handled by n_classdef() + # A custom rule in n_function def distinguishes whether to call this or + # function_def_async - "return_if_stmt": ("return %c\n", 0), - "ifstmt": ( - "%|if %c:\n%+%c%-", - 0, # "testexpr" or "testexpr_then" - 1, # "_ifstmts_jump" or "return_stmts" + "classdefdeco": ("\n\n%c", 0), + "classdefdeco1": ("%|@%c\n%c", 0, 1), + + "comp_body": ("",), # ignore when recusing + "comp_if": (" if %c%c", 0, 2), + "comp_if_not": (" if not %p%c", (0, "expr", PRECEDENCE["unary_not"]), 2), + "comp_iter": ("%c", 0), + + "compare_single": ('%p %[-1]{pattr.replace("-", " ")} %p', (0, 19), (1, 19)), + "compare_chained": ("%p %p", (0, 29), (1, 30)), + "compared_chained_middle": ('%[3]{pattr.replace("-", " ")} %p %p', (0, 19), (-2, 19)), + "compare_chained_right": ('%[1]{pattr.replace("-", " ")} %p', (0, 19)), + + "continue": ("%|continue\n",), + + "delete_subscript": ( + "%|del %p[%c]\n", + (0, "expr", PRECEDENCE["subscript"]), + (1, "expr"), ), - "iflaststmt": ("%|if %c:\n%+%c%-", 0, 1), - "iflaststmtl": ("%|if %c:\n%+%c%-", 0, 1), - "testtrue": ("not %p", (0, PRECEDENCE["unary_not"])), - # Generally the args here are 0: (some sort of) "testexpr", - # 1: (some sort of) "cstmts_opt", - # 2 or 3: "else_suite" - # But unfortunately there are irregularities, For example, 2.6- uses "testexpr_then" - # and sometimes "cstmts" instead of "cstmts_opt" happens. - # Down the line we might isolate these into version-specific rules. - "ifelsestmt": ("%|if %c:\n%+%c%-%|else:\n%+%c%-", 0, 1, 3), - "ifelsestmtc": ("%|if %c:\n%+%c%-%|else:\n%+%c%-", 0, 1, 3), - "ifelsestmtl": ("%|if %c:\n%+%c%-%|else:\n%+%c%-", 0, 1, 3), - # These are created only via transformation - "ifelifstmt": ("%|if %c:\n%+%c%-%c", 0, 1, 3), # "testexpr" or "testexpr_then" + "designList": ("%c = %c", 0, -1), + "dict_comp_body": ("%c: %c", 1, 0), + "elifelifstmt": ("%|elif %c:\n%+%c%-%c", 0, 1, 3), - "elifstmt": ("%|elif %c:\n%+%c%-", 0, 1), "elifelsestmt": ("%|elif %c:\n%+%c%-%|else:\n%+%c%-", 0, 1, 3), - "ifelsestmtr": ("%|if %c:\n%+%c%-%|else:\n%+%c%-", 0, 1, 2), - "ifelsestmtr2": ("%|if %c:\n%+%c%-%|else:\n%+%c%-\n\n", 0, 1, 3), # has COME_FROM "elifelsestmtr": ("%|elif %c:\n%+%c%-%|else:\n%+%c%-\n\n", 0, 1, 2), "elifelsestmtr2": ( "%|elif %c:\n%+%c%-%|else:\n%+%c%-\n\n", @@ -391,20 +316,22 @@ 1, 3, ), # has COME_FROM - "whileTruestmt": ("%|while True:\n%+%c%-\n\n", 1), - "whilestmt": ("%|while %c:\n%+%c%-\n\n", 1, 2), - "while1stmt": ("%|while 1:\n%+%c%-\n\n", 1), - "while1elsestmt": ("%|while 1:\n%+%c%-%|else:\n%+%c%-\n\n", 1, -2), - "whileelsestmt": ("%|while %c:\n%+%c%-%|else:\n%+%c%-\n\n", 1, 2, -2), - "whileelsestmt2": ("%|while %c:\n%+%c%-%|else:\n%+%c%-\n\n", 1, 2, -3), - "whileelselaststmt": ("%|while %c:\n%+%c%-%|else:\n%+%c%-", 1, 2, -2), + "elifstmt": ("%|elif %c:\n%+%c%-", 0, 1), + + "except": ("%|except:\n%+%c%-", 3), + "except_cond1": ("%|except %c:\n", 1), + "except_cond2": ("%|except %c as %c:\n", (1, "expr"), (5, "store")), + "except_suite": ("%+%c%-%C", 0, (1, maxint, "")), + + # In Python 3.6+, this is more complicated in the presence of "returns" + "except_suite_finalize": ("%+%c%-%C", 1, (3, maxint, "")), "expr_stmt": ( "%|%p\n", - # When a statment contains only a named_expr (:=) + # When a statement contains only a named_expr (:=) # the named_expr should have parenthesis around it. (0, "expr", PRECEDENCE["named_expr"] - 1) - ), + ), # Note: Python 3.8+ changes this "for": ("%|for %c in %c:\n%+%c%-\n\n", (3, "store"), (1, "expr"), (4, "for_block")), @@ -430,29 +357,212 @@ (4, "for_block"), -2, ), + + "function_def": ("\n\n%|def %c\n", -2), # -2 to handle closures + "function_def_deco": ("\n\n%c", 0), + + "gen_comp_body": ("%c", 0), + "get_iter": ("iter(%c)", (0, "expr"),), + + "if_exp": ("%p if %c else %c", (2, "expr", 27), 0, 4), + "if_exp_lambda": ("%p if %c else %c", (2, "expr", 27), (0, "expr"), 4), + "if_exp_true": ("%p if 1 else %c", (0, "expr", 27), 2), + "if_exp_ret": ("%p if %p else %p", (2, 27), (0, 27), (-1, 27)), + "if_exp_not": ( + "%p if not %p else %p", + (2, 27), + (0, "expr", PRECEDENCE["unary_not"]), + (4, 27), + ), + "if_exp_not_lambda": ("%p if not %c else %c", (2, "expr", 27), 0, 4), + + # Generally the args here are 0: (some sort of) "testexpr", + # 1: (some sort of) "cstmts_opt", + # 2 or 3: "else_suite" + # But unfortunately there are irregularities, For example, 2.6- uses "testexpr_then" + # and sometimes "cstmts" instead of "cstmts_opt" happens. + # Down the line we might isolate these into version-specific rules. + "ifelsestmt": ("%|if %c:\n%+%c%-%|else:\n%+%c%-", 0, 1, 3), + "ifelsestmtc": ("%|if %c:\n%+%c%-%|else:\n%+%c%-", 0, 1, 3), + "ifelsestmtl": ("%|if %c:\n%+%c%-%|else:\n%+%c%-", 0, 1, 3), + + # This is created only via transformation. + "ifelifstmt": ("%|if %c:\n%+%c%-%c", 0, 1, 3), # "testexpr" or "testexpr_then" + + "ifelsestmtr": ("%|if %c:\n%+%c%-%|else:\n%+%c%-", 0, 1, 2), + "ifelsestmtr2": ("%|if %c:\n%+%c%-%|else:\n%+%c%-\n\n", 0, 1, 3), # has COME_FROM + "iflaststmt": ("%|if %c:\n%+%c%-", 0, 1), + + "iflaststmtl": ("%|if %c:\n%+%c%-", 0, 1), + + "ifstmt": ( + "%|if %c:\n%+%c%-", + 0, # "testexpr" or "testexpr_then" + 1, # "_ifstmts_jump" or "return_stmts" + ), + + "import": ("%|import %c\n", 2), + "importlist": ("%C", (0, maxint, ", ")), + + # Note: the below rule isn't really complete: + # n_import_from() smashes node[2].pattr + "import_from": ( + "%|from %[2]{pattr} import %c\n", + (3, "importlist") + ), + + "import_from_star": ( + "%|from %[2]{pattr} import *\n", + ), + + "kv": ("%c: %c", 3, 1), + "kv2": ("%c: %c", 1, 2), + + "kwarg": ("%[0]{pattr}=%c", 1), # Change when Python 2 does LOAD_STR + "kwargs": ("%D", (0, maxint, ", ")), + "kwargs1": ("%D", (0, maxint, ", ")), + + "lc_body": ("",), # ignore when recursing + "list_iter": ("%c", 0), + "list_for": (" for %c in %c%c", 2, 0, 3), + "list_if": (" if %p%c", (0, "expr", 27), 2), + "list_if_not": (" if not %p%c", (0, "expr", PRECEDENCE["unary_not"]), 2), + + "mkfuncdeco": ("%|@%c\n%c", (0, "expr"), 1), + # A custom rule in n_function def distinguishes whether to call this or + # function_def_async + "mkfuncdeco0": ("%|def %c\n", (0, ("mkfunc", "mkfunc_annotate"))), + + # In cases where we desire an explicit new line. + # After docstrings which are followed by a "def" is + # one situations where Python formatting desires two newlines, + # and this is added, as a transformation rule. + "newline": ("\n"), + + "or": ("%p or %p", (0, PRECEDENCE["or"]), (1, PRECEDENCE["or"])), + + "pass": ("%|pass\n",), + + "print_item": (", %c", 0), + "print_items_nl_stmt": ("%|print %c%c\n", 0, 2), + "print_items_stmt": ("%|print %c%c,\n", 0, 2), # Python 2 only + "print_nl": ("%|print\n",), + "print_nl_to": ("%|print >> %c\n", 0), + "print_to": ("%|print >> %c, %c,\n", 0, 1), + "print_to_items": ("%C", (0, 2, ", ")), + "print_to_nl": ("%|print >> %c, %c\n", 0, 1), + + "raise_stmt0": ("%|raise\n",), + "raise_stmt1": ("%|raise %c\n", 0), + "raise_stmt3": ("%|raise %c, %c, %c\n", 0, 1, 2), + + "ret_and": ("%c and %c", 0, 2), + "ret_or": ("%c or %c", 0, 2), + + # Note: we have a custom rule, which calls when we don't + # have "return None" + "return": ( "%|return %c\n", 0), + + "set_comp_body": ("%c", 0), + + "set_iter": ( "%c", 0 ), + + "return_if_stmt": ("return %c\n", 0), + "slice0": ( + "%c[:]", + (0, "expr"), + ), + "slice1": ( + "%c[%p:]", + (0, "expr"), + (1, NO_PARENTHESIS_EVER) + ), + + "slice2": ( "%c[:%p]", + (0, "expr"), + (1, NO_PARENTHESIS_EVER) + ), + + "slice3": ( + "%c[%p:%p]", + (0, "expr"), + (1, NO_PARENTHESIS_EVER), + (2, NO_PARENTHESIS_EVER) + ), + + "store_subscript": ( + "%p[%c]", + (0, "expr", PRECEDENCE["subscript"]), (1, "expr") + ), + + # This nonterminal we create on the fly in semantic routines + "store_w_parens": ( + "(%c).%[1]{pattr}", + (0, "expr") + ), + + # This is only generated by transform + # it is a string at the beginning of a function that is *not* a docstring + # 3.7 test_fstring.py tests for this kind of crap. + # For compatibility with older Python, we'll use "%" instead of + # a format string. + "string_at_beginning": ('%|"%%s" %% %c\n', 0), + + "subscript": ( + "%p[%p]", + (0, "expr", PRECEDENCE["subscript"]), + (1, "expr", NO_PARENTHESIS_EVER) + ), + + "subscript2": ( + "%p[%p]", + (0, "expr", PRECEDENCE["subscript"]), + (1, "expr", NO_PARENTHESIS_EVER) + ), + + "testtrue": ("not %p", (0, PRECEDENCE["unary_not"])), + + # Note: this is generated generated by grammar rules but in this phase. + "tf_try_except": ("%c%-%c%+", 1, 3), + "tf_tryelsestmt": ("%c%-%c%|else:\n%+%c", 1, 3, 4), + "try_except": ("%|try:\n%+%c%-%c\n\n", 1, 3), "tryelsestmt": ("%|try:\n%+%c%-%c%|else:\n%+%c%-\n\n", 1, 3, 4), "tryelsestmtc": ("%|try:\n%+%c%-%c%|else:\n%+%c%-", 1, 3, 4), "tryelsestmtl": ("%|try:\n%+%c%-%c%|else:\n%+%c%-", 1, 3, 4), - # Note: this is generated generated by grammar rules but in this phase. - "tf_try_except": ("%c%-%c%+", 1, 3), - "tf_tryelsestmt": ("%c%-%c%|else:\n%+%c", 1, 3, 4), "tryfinallystmt": ("%|try:\n%+%c%-%|finally:\n%+%c%-\n\n", 1, 5), - "except": ("%|except:\n%+%c%-", 3), - "except_cond1": ("%|except %c:\n", 1), - "except_cond2": ("%|except %c as %c:\n", (1, "expr"), (5, "store")), - "except_suite": ("%+%c%-%C", 0, (1, maxint, "")), - # In Python 3.6+, this is more complicated in the presence of "returns" - "except_suite_finalize": ("%+%c%-%C", 1, (3, maxint, "")), - "pass": ("%|pass\n",), - "STORE_FAST": ("%{pattr}",), - "kv": ("%c: %c", 3, 1), - "kv2": ("%c: %c", 1, 2), - "import": ("%|import %c\n", 2), - "importlist": ("%C", (0, maxint, ", ")), - "import_from": ("%|from %[2]{pattr} import %c\n", (3, "importlist")), - "import_from_star": ("%|from %[2]{pattr} import *\n",), + + # unary_op (formerly "unary_expr") is the Python AST UnaryOp + "unary_op": ("%c%c", (1, "unary_operator"), (0, "expr")), + "unary_not": ("not %c", (0, "expr")), + "unary_convert": ("`%c`", (0, "expr"),), + + "unpack": ("%C%,", (1, maxint, ", ")), + "unpack_list": ("[%C]", (1, maxint, ", ")), + # This nonterminal we create on the fly in semantic routines + "unpack_w_parens": ("(%C%,)", (1, maxint, ", ")), + + "whileTruestmt": ("%|while True:\n%+%c%-\n\n", 1), + "whilestmt": ("%|while %c:\n%+%c%-\n\n", 1, 2), + "while1stmt": ("%|while 1:\n%+%c%-\n\n", 1), + "while1elsestmt": ("%|while 1:\n%+%c%-%|else:\n%+%c%-\n\n", 1, -2), + "whileelsestmt": ("%|while %c:\n%+%c%-%|else:\n%+%c%-\n\n", 1, 2, -2), + "whileelsestmt2": ("%|while %c:\n%+%c%-%|else:\n%+%c%-\n\n", 1, 2, -3), + "whileelselaststmt": ("%|while %c:\n%+%c%-%|else:\n%+%c%-", 1, 2, -2), + + # If there are situations where we need "with ... as ()" + # We may need to customize this in n_with_as + "with_as": ( + "%|with %c as %c:\n%+%c%-", + (0, "expr"), + (2, "store"), + (3, ("suite_stmts_opt", "suite_stmts")), + ), + + # "yield": ( "yield %c", 0), + } +# fmt: on MAP_DIRECT = (TABLE_DIRECT,) @@ -465,7 +575,7 @@ "store": MAP_R, } -ASSIGN_TUPLE_PARAM = lambda param_name: SyntaxTree( +ASSIGN_TUPLE_PARAM = lambda param_name: SyntaxTree( # noqa "expr", [Token("LOAD_FAST", pattr=param_name)] ) diff --git a/uncompyle6/semantics/customize.py b/uncompyle6/semantics/customize.py index e134a72e2..cb4758277 100644 --- a/uncompyle6/semantics/customize.py +++ b/uncompyle6/semantics/customize.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2019, 2021 by Rocky Bernstein +# Copyright (c) 2018-2019, 2021-2022 2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,9 +17,15 @@ """ from uncompyle6.parsers.treenode import SyntaxTree -from uncompyle6.semantics.consts import INDENT_PER_LEVEL, PRECEDENCE, TABLE_R, TABLE_DIRECT -from uncompyle6.semantics.helper import flatten_list from uncompyle6.scanners.tok import Token +from uncompyle6.semantics.consts import ( + INDENT_PER_LEVEL, + NO_PARENTHESIS_EVER, + PRECEDENCE, + TABLE_DIRECT, + TABLE_R, +) +from uncompyle6.semantics.helper import flatten_list def customize_for_version(self, is_pypy, version): @@ -27,38 +33,51 @@ def customize_for_version(self, is_pypy, version): ######################## # PyPy changes ####################### - TABLE_DIRECT.update({ - "assert": ("%|assert %c\n", 0), - "assert_pypy": ( '%|assert %c\n' , (1, 'assert_expr') ), + # fmt: off + self.TABLE_DIRECT.update({ + + "assert": ("%|assert %c\n", 0), + # This can happen as a result of an if transformation + "assert2": ("%|assert %c, %c\n", 0, 3), + "assert_pypy": ( "%|assert %c\n" , (1, "assert_expr") ), # This is as a result of an if transformation - 'assert0_pypy': ( '%|assert %c\n' , 0), - - 'assert_not_pypy': ( '%|assert not %c\n' , (1, 'assert_exp') ), - 'assert2_not_pypy': ( '%|assert not %c, %c\n' , (1, 'assert_exp'), - (4, 'expr') ), - 'assert2_pypy': ( '%|assert %c, %c\n' , (1, 'assert_expr'), - (4, 'expr') ), - 'try_except_pypy': ( '%|try:\n%+%c%-%c\n\n', 1, 2 ), - 'tryfinallystmt_pypy': ( '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', 1, 3 ), - 'assign3_pypy': ( '%|%c, %c, %c = %c, %c, %c\n', 5, 4, 3, 0, 1, 2 ), - 'assign2_pypy': ( '%|%c, %c = %c, %c\n', 3, 2, 0, 1), + 'assert0_pypy': ( "%|assert %c\n" , 0), + + 'assert_not_pypy': ( "%|assert not %c\n" , (1, "assert_exp") ), + "assert2_not_pypy": ( + "%|assert not %c, %c\n", + (1, "assert_exp"), + (4, "expr"), + ), + + "try_except_pypy": ( "%|try:\n%+%c%-%c\n\n", 1, 2 ), + "tryfinallystmt_pypy": ( "%|try:\n%+%c%-%|finally:\n%+%c%-\n\n", 1, 3 ), + "assign3_pypy": ( "%|%c, %c, %c = %c, %c, %c\n", 5, 4, 3, 0, 1, 2 ), + "assign2_pypy": ( "%|%c, %c = %c, %c\n", 3, 2, 0, 1), }) + # fmt: on if version[:2] >= (3, 7): def n_call_kw_pypy37(node): - self.template_engine(("%p(", (0, 100)), node) + self.template_engine(("%p(", (0, NO_PARENTHESIS_EVER)), node) assert node[-1] == "CALL_METHOD_KW" arg_count = node[-1].attr kw_names = node[-2] assert kw_names == "pypy_kw_keys" - flat_elems = flatten_list(node[1:-2]) - n = len(flat_elems) - assert n == arg_count kwargs_names = kw_names[0].attr kwarg_count = len(kwargs_names) pos_argc = arg_count - kwarg_count + + flat_elems = flatten_list(node[1:-2]) + n = len(flat_elems) + assert n == arg_count, "n: %s, arg_count: %s\n%s" % ( + n, + arg_count, + node, + ) + sep = "" for i in range(pos_argc): @@ -95,39 +114,42 @@ def n_call_kw_pypy37(node): ######################## # Without PyPy ####################### - TABLE_DIRECT.update({ - # "assert" and "assert_expr" are added via transform rules. - "assert": ("%|assert %c\n", 0), - "assert2": ("%|assert %c, %c\n", 0, 3), - - # Created only via transformation - "assertnot": ("%|assert not %p\n", (0, PRECEDENCE['unary_not'])), - "assert2not": ( "%|assert not %p, %c\n" , - (0, PRECEDENCE['unary_not']), 3 ), - - "assign2": ("%|%c, %c = %c, %c\n", 3, 4, 0, 1), - "assign3": ("%|%c, %c, %c = %c, %c, %c\n", 5, 6, 7, 0, 1, 2), - "try_except": ("%|try:\n%+%c%-%c\n\n", 1, 3), - }) + self.TABLE_DIRECT.update( + { + # "assert" and "assert_expr" are added via transform rules. + "assert": ("%|assert %c\n", 0), + "assert2": ("%|assert %c, %c\n", 0, 3), + # Created only via transformation + "assertnot": ("%|assert not %p\n", (0, PRECEDENCE["unary_not"])), + "assert2not": ( + "%|assert not %p, %c\n", + (0, PRECEDENCE["unary_not"]), + 3, + ), + "assign2": ("%|%c, %c = %c, %c\n", 3, 4, 0, 1), + "assign3": ("%|%c, %c, %c = %c, %c, %c\n", 5, 6, 7, 0, 1, 2), + "try_except": ("%|try:\n%+%c%-%c\n\n", 1, 3), + } + ) if version >= (3, 0): if version >= (3, 2): - TABLE_DIRECT.update( + self.TABLE_DIRECT.update( {"del_deref_stmt": ("%|del %c\n", 0), "DELETE_DEREF": ("%{pattr}", 0)} ) from uncompyle6.semantics.customize3 import customize_for_version3 customize_for_version3(self, version) else: # < 3.0 - TABLE_DIRECT.update( + self.TABLE_DIRECT.update( {"except_cond3": ("%|except %c, %c:\n", (1, "expr"), (-2, "store"))} ) if version <= (2, 6): - TABLE_DIRECT["testtrue_then"] = TABLE_DIRECT["testtrue"] + self.TABLE_DIRECT["testtrue_then"] = self.TABLE_DIRECT["testtrue"] if (2, 4) <= version <= (2, 6): - TABLE_DIRECT.update({"comp_for": (" for %c in %c", 3, 1)}) + self.TABLE_DIRECT.update({"comp_for": (" for %c in %c", 3, 1)}) else: - TABLE_DIRECT.update({"comp_for": (" for %c in %c%c", 2, 0, 3)}) + self.TABLE_DIRECT.update({"comp_for": (" for %c in %c%c", 2, 0, 3)}) if version >= (2, 5): from uncompyle6.semantics.customize25 import customize_for_version25 @@ -175,7 +197,7 @@ def n_call_kw_pypy37(node): ) ], ) - TABLE_DIRECT.update( + self.TABLE_DIRECT.update( { "importmultiple": ("%|import %c%c\n", 2, 3), "import_cont": (", %c", 2), @@ -187,16 +209,47 @@ def n_call_kw_pypy37(node): } ) if version == (2, 4): + def n_iftrue_stmt24(node): self.template_engine(("%c", 0), node) self.default(node) self.prune() self.n_iftrue_stmt24 = n_iftrue_stmt24 - else: # version <= 2.3: - TABLE_DIRECT.update({"if1_stmt": ("%|if 1\n%+%c%-", 5)}) + elif version < (1, 4): + from uncompyle6.semantics.customize14 import customize_for_version14 + + customize_for_version14(self, version) + + def n_call(node): + expr = node[0] + assert expr == "expr" + params = node[1] + if params == "tuple": + self.template_engine(("%p(", (0, NO_PARENTHESIS_EVER)), expr) + sep = "" + for param in params[:-1]: + self.write(sep) + self.preorder(param) + sep = ", " + self.write(")") + else: + self.template_engine( + ( + "%p(%P)", + (0, "expr", 100), + (1, -1, ", ", NO_PARENTHESIS_EVER), + ), + node, + ) + self.prune() + + self.n_call = n_call + + else: # 1.0 <= version <= 2.3: + self.TABLE_DIRECT.update({"if1_stmt": ("%|if 1\n%+%c%-", 5)}) if version <= (2, 1): - TABLE_DIRECT.update( + self.TABLE_DIRECT.update( { "importmultiple": ("%c", 2), # FIXME: not quite right. We have indiividual imports @@ -210,7 +263,7 @@ def n_iftrue_stmt24(node): # < 3.0 continues - TABLE_R.update( + self.TABLE_R.update( { "STORE_SLICE+0": ("%c[:]", 0), "STORE_SLICE+1": ("%c[%p:]", 0, (1, -1)), @@ -222,7 +275,7 @@ def n_iftrue_stmt24(node): "DELETE_SLICE+3": ("%|del %c[%c:%c]\n", 0, 1, 2), } ) - TABLE_DIRECT.update({"raise_stmt2": ("%|raise %c, %c\n", 0, 1)}) + self.TABLE_DIRECT.update({"raise_stmt2": ("%|raise %c, %c\n", 0, 1)}) # exec as a built-in statement is only in Python 2.x def n_exec_stmt(node): diff --git a/uncompyle6/semantics/customize14.py b/uncompyle6/semantics/customize14.py new file mode 100644 index 000000000..566a6ec1f --- /dev/null +++ b/uncompyle6/semantics/customize14.py @@ -0,0 +1,27 @@ +# Copyright (c) 2022, 2024 by Rocky Bernstein +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +"""Isolate Python 1.4- version-specific semantic actions here. +""" + + +####################### +# Python 1.4- Changes # +####################### +def customize_for_version14(self, version: tuple): + self.TABLE_DIRECT.update( + { + "print_expr_stmt": (("%|print %c\n", 0)), + } + ) diff --git a/uncompyle6/semantics/customize25.py b/uncompyle6/semantics/customize25.py index 911997a98..d635e9f10 100644 --- a/uncompyle6/semantics/customize25.py +++ b/uncompyle6/semantics/customize25.py @@ -17,23 +17,25 @@ from uncompyle6.semantics.consts import TABLE_DIRECT + ####################### # Python 2.5+ Changes # ####################### def customize_for_version25(self, version): - ######################## # Import style for 2.5+ ######################## - TABLE_DIRECT.update({ - 'importmultiple': ( '%|import %c%c\n', 2, 3 ), - 'import_cont' : ( ', %c', 2 ), - # With/as is allowed as "from future" thing in 2.5 - # Note: It is safe to put the variables after "as" in parenthesis, - # and sometimes it is needed. - 'with': ( '%|with %c:\n%+%c%-', 0, 3), - 'withasstmt': ( '%|with %c as (%c):\n%+%c%-', 0, 2, 3), - }) + self.TABLE_DIRECT.update( + { + "importmultiple": ("%|import %c%c\n", 2, 3), + "import_cont": (", %c", 2), + # With/as is allowed as "from future" thing in 2.5 + # Note: It is safe to put the variables after "as" in parenthesis, + # and sometimes it is needed. + "with": ("%|with %c:\n%+%c%-", 0, 3), + "and_then": ("%c and %c", (0, "expr"), (4, "expr")), + } + ) # In 2.5+ "except" handlers and the "finally" can appear in one # "try" statement. So the below has the effect of combining the @@ -41,10 +43,18 @@ def customize_for_version25(self, version): # FIXME: something doesn't smell right, since the semantics # are different. See test_fileio.py for an example that shows this. def tryfinallystmt(node): - if len(node[1][0]) == 1 and node[1][0][0] == 'stmt': - if node[1][0][0][0] == 'try_except': - node[1][0][0][0].kind = 'tf_try_except' - if node[1][0][0][0] == 'tryelsestmt': - node[1][0][0][0].kind = 'tf_tryelsestmt' + if len(node[1][0]) == 1 and node[1][0][0] == "stmt": + if node[1][0][0][0] == "try_except": + node[1][0][0][0].kind = "tf_try_except" + if node[1][0][0][0] == "tryelsestmt": + node[1][0][0][0].kind = "tf_tryelsestmt" self.default(node) + self.n_tryfinallystmt = tryfinallystmt + + def n_import_from(node): + if node[0].pattr > 0: + node[2].pattr = ("." * node[0].pattr) + node[2].pattr + self.default(node) + + self.n_import_from = n_import_from diff --git a/uncompyle6/semantics/customize26_27.py b/uncompyle6/semantics/customize26_27.py index ee50bb217..d70e43f22 100644 --- a/uncompyle6/semantics/customize26_27.py +++ b/uncompyle6/semantics/customize26_27.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 2021 by Rocky Bernstein +# Copyright (c) 2019 2021, 2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,8 +17,8 @@ from uncompyle6.semantics.consts import TABLE_DIRECT -def customize_for_version26_27(self, version): +def customize_for_version26_27(self, version: tuple): ######################################## # Python 2.6+ # except as @@ -29,16 +29,20 @@ def customize_for_version26_27(self, version): # matches how we parse this in bytecode ######################################## if version > (2, 6): - TABLE_DIRECT.update({ - 'except_cond2': ( '%|except %c as %c:\n', 1, 5 ), - # When a generator is a single parameter of a function, - # it doesn't need the surrounding parenethesis. - 'call_generator': ('%c%P', 0, (1, -1, ', ', 100)), - }) + self.TABLE_DIRECT.update( + { + "except_cond2": ("%|except %c as %c:\n", 1, 5), + # When a generator is a single parameter of a function, + # it doesn't need the surrounding parenethesis. + "call_generator": ("%c%P", 0, (1, -1, ", ", 100)), + } + ) else: - TABLE_DIRECT.update({ - 'testtrue_then': ( 'not %p', (0, 22) ), - }) + self.TABLE_DIRECT.update( + { + "testtrue_then": ("not %p", (0, 22)), + } + ) # FIXME: this should be a transformation def n_call(node): @@ -47,16 +51,24 @@ def n_call(node): for i in mapping[1:]: key = key[i] pass - if key.kind == 'CALL_FUNCTION_1': + if key.kind == "CALL_FUNCTION_1": # A function with one argument. If this is a generator, # no parenthesis is needed. args_node = node[-2] - if args_node == 'expr': + if args_node == "expr": n = args_node[0] - if n == 'generator_exp': - node.kind = 'call_generator' + if n == "generator_exp": + node.kind = "call_generator" pass pass self.default(node) + self.n_call = n_call + + def n_import_from(node): + if node[0].pattr > 0: + node[2].pattr = ("." * node[0].pattr) + node[2].pattr + self.default(node) + + self.n_import_from = n_import_from diff --git a/uncompyle6/semantics/customize3.py b/uncompyle6/semantics/customize3.py index bb8abe275..45564d25e 100644 --- a/uncompyle6/semantics/customize3.py +++ b/uncompyle6/semantics/customize3.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2021 by Rocky Bernstein +# Copyright (c) 2018-2021, 2023-2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -13,27 +13,24 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Isolate Python 3 version-specific semantic actions here. +""" +Isolate Python 3 version-specific semantic actions here. """ -from uncompyle6.semantics.consts import TABLE_DIRECT - -from xdis import co_flags_is_async, iscode -from uncompyle6.scanner import Code -from uncompyle6.semantics.helper import ( - find_code_node, - gen_function_parens_adjust, -) +from xdis import iscode -from uncompyle6.semantics.make_function3 import make_function3_annotate +from uncompyle6.semantics.consts import TABLE_DIRECT from uncompyle6.semantics.customize35 import customize_for_version35 from uncompyle6.semantics.customize36 import customize_for_version36 from uncompyle6.semantics.customize37 import customize_for_version37 from uncompyle6.semantics.customize38 import customize_for_version38 +from uncompyle6.semantics.helper import find_code_node, gen_function_parens_adjust +from uncompyle6.semantics.make_function3 import make_function3_annotate +from uncompyle6.util import get_code_name -def customize_for_version3(self, version): - TABLE_DIRECT.update( +def customize_for_version3(self, version: tuple): + self.TABLE_DIRECT.update( { "comp_for": (" for %c in %c", (2, "store"), (0, "expr")), "if_exp_not": ( @@ -51,10 +48,9 @@ def customize_for_version3(self, version): "import_cont": (", %c", 2), "kwarg": ("%[0]{attr}=%c", 1), "raise_stmt2": ("%|raise %c from %c\n", 0, 1), - "tf_tryelsestmtl3": ( '%c%-%c%|else:\n%+%c', 1, 3, 5 ), + "tf_tryelsestmtl3": ("%c%-%c%|else:\n%+%c", 1, 3, 5), "store_locals": ("%|# inspect.currentframe().f_locals = __locals__\n",), "with": ("%|with %c:\n%+%c%-", 0, 3), - "withasstmt": ("%|with %c as (%c):\n%+%c%-", 0, 2, 3), } ) @@ -67,152 +63,22 @@ def customize_for_version3(self, version): # are different. See test_fileio.py for an example that shows this. def tryfinallystmt(node): suite_stmts = node[1][0] - if len(suite_stmts) == 1 and suite_stmts[0] == 'stmt': + if len(suite_stmts) == 1 and suite_stmts[0] == "stmt": stmt = suite_stmts[0] try_something = stmt[0] if try_something == "try_except": try_something.kind = "tf_try_except" if try_something.kind.startswith("tryelsestmt"): if try_something == "tryelsestmtl3": - try_something.kind = 'tf_tryelsestmtl3' + try_something.kind = "tf_tryelsestmtl3" else: - try_something.kind = 'tf_tryelsestmt' + try_something.kind = "tf_tryelsestmt" self.default(node) - self.n_tryfinallystmt = tryfinallystmt - def listcomp_closure3(node): - """List comprehensions in Python 3 when handled as a closure. - See if we can combine code. - """ - - # FIXME: DRY with comprehension_walk_newer - p = self.prec - self.prec = 27 - - code_obj = node[1].attr - assert iscode(code_obj), node[1] - code = Code(code_obj, self.scanner, self.currentclass, self.debug_opts["asm"]) - - ast = self.build_ast(code._tokens, code._customize, code) - self.customize(code._customize) - - # skip over: sstmt, stmt, return, return_expr - # and other singleton derivations - while len(ast) == 1 or ( - ast in ("sstmt", "return") and ast[-1] in ("RETURN_LAST", "RETURN_VALUE") - ): - self.prec = 100 - ast = ast[0] - - n = ast[1] - - # Pick out important parts of the comprehension: - # * the variables we iterate over: "stores" - # * the results we accumulate: "n" - - # collections is the name of the expression(s) we are iterating over - collections = [node[-3]] - list_ifs = [] - - if self.version[:2] == (3, 0) and n != "list_iter": - # FIXME 3.0 is a snowflake here. We need - # special code for this. Not sure if this is totally - # correct. - stores = [ast[3]] - assert ast[4] == "comp_iter" - n = ast[4] - # Find the list comprehension body. It is the inner-most - # node that is not comp_.. . - while n == "comp_iter": - if n[0] == "comp_for": - n = n[0] - stores.append(n[2]) - n = n[3] - elif n[0] in ("comp_if", "comp_if_not"): - n = n[0] - # FIXME: just a guess - if n[0].kind == "expr": - list_ifs.append(n) - else: - list_ifs.append([1]) - n = n[2] - pass - else: - break - pass - - # Skip over n[0] which is something like: _[1] - self.preorder(n[1]) - - else: - assert n == "list_iter" - stores = [] - # Find the list comprehension body. It is the inner-most - # node that is not list_.. . - while n == "list_iter": - - # recurse one step - n = n[0] - - # FIXME: adjust for set comprehension - if n == "list_for": - stores.append(n[2]) - n = n[3] - if n[0] == "list_for": - # Dog-paddle down largely singleton reductions - # to find the collection (expr) - c = n[0][0] - if c == "expr": - c = c[0] - # FIXME: grammar is wonky here? Is this really an attribute? - if c == "attribute": - c = c[0] - collections.append(c) - pass - elif n in ("list_if", "list_if_not", "list_if_or_not"): - if n[0].kind == "expr": - list_ifs.append(n) - else: - list_ifs.append([1]) - n = n[-2] if n[-1] == "come_from_opt" else n[-1] - pass - elif n == "list_if37": - list_ifs.append(n) - n = n[-1] - pass - elif n == "list_afor": - collections.append(n[0][0]) - n = n[1] - stores.append(n[1][0]) - n = n[2] if n[2].kind == "list_iter" else n[3] - pass - - assert n == "lc_body", ast - - self.preorder(n[0]) - - # FIXME: add indentation around "for"'s and "in"'s - n_colls = len(collections) - for i, store in enumerate(stores): - if i >= n_colls: - break - if collections[i] == "LOAD_DEREF" and co_flags_is_async(code_obj.co_flags): - self.write(" async") - pass - self.write(" for ") - self.preorder(store) - self.write(" in ") - self.preorder(collections[i]) - if i < len(list_ifs): - self.preorder(list_ifs[i]) - pass - pass - self.prec = p - self.listcomp_closure3 = listcomp_closure3 + self.n_tryfinallystmt = tryfinallystmt def n_classdef3(node): - """Handle "classdef" nonterminal for 3.0 >= version 3.0 < 3.6 - """ + """Handle "classdef" nonterminal for 3.0 >= version 3.0 < 3.6""" assert (3, 0) <= self.version < (3, 6) @@ -272,14 +138,10 @@ def n_classdef3(node): # Python 3.2 works like this subclass_code = find_code_node(load_closure, -2).attr else: - raise "Internal Error n_classdef: cannot find class body" - if hasattr(build_class[3], "__len__"): - if not subclass_info: - subclass_info = build_class[3] - elif hasattr(build_class[2], "__len__"): - subclass_info = build_class[2] - else: - raise "Internal Error n_classdef: cannot superclass name" + raise RuntimeError("Internal Error n_classdef: cannot find class body") + + subclass_info = build_class + elif not subclass_info: if mkfunc[0] in ("no_kwargs", "kwargs"): subclass_code = mkfunc[1].attr @@ -321,18 +183,25 @@ def n_classdef3(node): # the iteration variable. These rules we can ignore # since we pick up the iteration variable some other way and # we definitely don't include in the source _[dd]. - TABLE_DIRECT.update({ - "ifstmt30": ( "%|if %c:\n%+%c%-", - (0, "testfalse_then"), - (1, "_ifstmts_jump30") ), - "ifnotstmt30": ( "%|if not %c:\n%+%c%-", - (0, "testtrue_then"), - (1, "_ifstmts_jump30") ), - "try_except30": ( "%|try:\n%+%c%-%c\n\n", - (1, "suite_stmts_opt"), - (4, "except_handler") ), - - }) + self.TABLE_DIRECT.update( + { + "ifstmt30": ( + "%|if %c:\n%+%c%-", + (0, "testfalse_then"), + (1, "_ifstmts_jump30"), + ), + "ifnotstmt30": ( + "%|if not %c:\n%+%c%-", + (0, "testtrue_then"), + (1, "_ifstmts_jump30"), + ), + "try_except30": ( + "%|try:\n%+%c%-%c\n\n", + (1, "suite_stmts_opt"), + (4, "except_handler"), + ), + } + ) def n_comp_iter(node): if node[0] == "expr": @@ -352,7 +221,7 @@ def n_yield_from(node): assert node[0] == "expr" if node[0][0] == "get_iter": # Skip over yield_from.expr.get_iter which adds an - # extra iter(). Maybe we can do in tranformation phase instead? + # extra iter(). Maybe we can do in transformation phase instead? template = ("yield from %c", (0, "expr")) self.template_engine(template, node[0][0]) else: @@ -365,7 +234,6 @@ def n_yield_from(node): if (3, 2) <= version <= (3, 4): def n_call(node): - mapping = self._get_mapping(node) key = node for i in mapping[1:]: @@ -419,24 +287,23 @@ def n_call(node): self.n_call = n_call def n_mkfunc_annotate(node): - # Handling EXTENDED_ARG before MAKE_FUNCTION ... i = -1 if node[-2] == "EXTENDED_ARG" else 0 if self.version < (3, 3): - code = node[-2 + i] + code_node = node[-2 + i] elif self.version >= (3, 3) or node[-2] == "kwargs": # LOAD_CONST code object .. # LOAD_CONST 'x0' if >= 3.3 # EXTENDED_ARG # MAKE_FUNCTION .. - code = node[-3 + i] + code_node = node[-3 + i] elif node[-3] == "expr": - code = node[-3][0] + code_node = node[-3][0] else: # LOAD_CONST code object .. # MAKE_FUNCTION .. - code = node[-3] + code_node = node[-3] self.indent_more() for annotate_last in range(len(node) - 1, -1, -1): @@ -446,13 +313,17 @@ def n_mkfunc_annotate(node): # FIXME: the real situation is that when derived from # function_def_annotate we the name has been filled in. # But when derived from funcdefdeco it hasn't Would like a better - # way to distinquish. + # way to distinguish. if self.f.getvalue()[-4:] == "def ": - self.write(code.attr.co_name) + self.write(get_code_name(code_node.attr)) # FIXME: handle and pass full annotate args make_function3_annotate( - self, node, is_lambda=False, code_node=code, annotate_last=annotate_last + self, + node, + is_lambda=False, + code_node=code_node, + annotate_last=annotate_last, ) if len(self.param_stack) > 1: @@ -464,12 +335,12 @@ def n_mkfunc_annotate(node): self.n_mkfunc_annotate = n_mkfunc_annotate - TABLE_DIRECT.update( + self.TABLE_DIRECT.update( { "tryelsestmtl3": ( "%|try:\n%+%c%-%c%|else:\n%+%c%-", (1, "suite_stmts_opt"), - 3, # "except_handler_else" or "except_handler" + 3, # "except_handler_else" or "except_handler" (5, "else_suitel"), ), "LOAD_CLASSDEREF": ("%{pattr}",), @@ -479,7 +350,7 @@ def n_mkfunc_annotate(node): ####################### # Python 3.4+ Changes # ####################### - TABLE_DIRECT.update( + self.TABLE_DIRECT.update( { "LOAD_CLASSDEREF": ("%{pattr}",), "yield_from": ("yield from %c", (0, "expr")), diff --git a/uncompyle6/semantics/customize35.py b/uncompyle6/semantics/customize35.py index f95f38224..df3714154 100644 --- a/uncompyle6/semantics/customize35.py +++ b/uncompyle6/semantics/customize35.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020 by Rocky Bernstein +# Copyright (c) 2019-2020, 2022, 2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -16,35 +16,42 @@ """ from xdis import co_flags_is_async, iscode -from uncompyle6.semantics.consts import ( - INDENT_PER_LEVEL, - PRECEDENCE, - TABLE_DIRECT, -) +from uncompyle6.semantics.consts import INDENT_PER_LEVEL, PRECEDENCE, TABLE_DIRECT from uncompyle6.semantics.helper import flatten_list, gen_function_parens_adjust + ####################### # Python 3.5+ Changes # ####################### -def customize_for_version35(self, version): - TABLE_DIRECT.update( +def customize_for_version35(self, version: tuple): + # fmt: off + self.TABLE_DIRECT.update( { # nested await expressions like: # return await (await bar()) # need parenthesis. - "await_expr": ("await %p", (0, PRECEDENCE["await_expr"]-1)), + "await_expr": ("await %p", (0, PRECEDENCE["await_expr"] - 1)), "await_stmt": ("%|%c\n", 0), - "async_for_stmt": ("%|async for %c in %c:\n%+%|%c%-\n\n", 9, 1, 25), + "async_for_stmt": ( + "%|async for %c in %c:\n%+%|%c%-\n\n", + (9, "store"), + (1, "expr"), + (25, ("for_block", "pass")), + ), "async_forelse_stmt": ( "%|async for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n", - 9, - 1, - 25, - (27, "else_suite"), + (9, "store"), + (1, "expr"), + (25, "for_block"), + (-2, "else_suite"), + ), + "async_with_stmt": ( + "%|async with %c:\n%+%c%-", + (0, "expr"), + 3 ), - "async_with_stmt": ("%|async with %c:\n%+%c%-", (0, "expr"), 3), "async_with_as_stmt": ( "%|async with %c as %c:\n%+%c%-", (0, "expr"), @@ -56,6 +63,8 @@ def customize_for_version35(self, version): } ) + # fmt: on + def async_call(node): self.f.write("async ") node.kind == "call" @@ -86,6 +95,8 @@ def n_build_list_unpack(node): if lastnodetype.startswith("BUILD_LIST"): self.write("[") endchar = "]" + else: + endchar = "" flat_elems = flatten_list(node) @@ -182,7 +193,11 @@ def n_call(node): self.template_engine(template, args_node) else: if len(node) - nargs > 3: - template = ("*%c, %P)", nargs + 1, (nargs + kwargs + 1, -1, ", ", 100)) + template = ( + "*%c, %P)", + nargs + 1, + (nargs + kwargs + 1, -1, ", ", 100), + ) else: template = ("*%c)", nargs + 1) self.template_engine(template, node) diff --git a/uncompyle6/semantics/customize36.py b/uncompyle6/semantics/customize36.py index 9df98ad19..bb5efd845 100644 --- a/uncompyle6/semantics/customize36.py +++ b/uncompyle6/semantics/customize36.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021 by Rocky Bernstein +# Copyright (c) 2019-2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,16 +15,18 @@ """Isolate Python 3.6 version-specific semantic actions here. """ -from xdis import iscode from spark_parser.ast import GenericASTTraversalPruningException +from xdis import iscode + from uncompyle6.scanners.tok import Token -from uncompyle6.semantics.helper import flatten_list, escape_string, strip_quotes from uncompyle6.semantics.consts import ( INDENT_PER_LEVEL, PRECEDENCE, TABLE_DIRECT, TABLE_R, ) +from uncompyle6.semantics.helper import escape_string, flatten_list, strip_quotes +from uncompyle6.util import get_code_name def escape_format(s): @@ -36,8 +38,7 @@ def escape_format(s): ####################### -def customize_for_version36(self, version): - +def customize_for_version36(self, version: tuple): # fmt: off PRECEDENCE["call_kw"] = 0 PRECEDENCE["call_kw36"] = 1 @@ -49,7 +50,7 @@ def customize_for_version36(self, version): PRECEDENCE["dict_pack"] = 0 # **{ ... } PRECEDENCE["formatted_value1"] = 100 - TABLE_DIRECT.update( + self.TABLE_DIRECT.update( { "ann_assign_init_value": ( "%|%c = %p\n", @@ -61,7 +62,15 @@ def customize_for_version36(self, version): "%|async for %c in %c:\n%+%c%-\n\n", (9, "store"), (1, "expr"), - (18, "for_block"), + # Count from end, since COME_FROM shifts things in the forward direction + (-9, ("for_block", "pass")), + ), + "async_forelse_stmt36": ( + "%|async for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n", + (9, "store"), + (1, "expr"), + (-10, "for_block"), + (-2, "else_suite"), ), "call_ex": ("%c(%p)", (0, "expr"), (1, 100)), "except_return": ("%|except:\n%+%c%-", 3), @@ -73,6 +82,12 @@ def customize_for_version36(self, version): "ifstmtl": ("%|if %c:\n%+%c%-", (0, "testexpr"), (1, "_ifstmts_jumpl")), + + "list_afor": ( + " async for %[1]{%c} in %c%[1]{%c}", + (1, "store"), (0, "get_aiter"), (3, "list_iter"), + ), + "try_except36": ("%|try:\n%+%c%-%c\n\n", 1, -2), "tryfinally36": ("%|try:\n%+%c%-%|finally:\n%+%c%-\n\n", (1, "returns"), 3), "tryfinally_return_stmt": ("%|try:\n%+%c%-%|finally:\n%+%|return%-\n\n", 1), @@ -81,7 +96,7 @@ def customize_for_version36(self, version): } ) - TABLE_R.update( + self.TABLE_R.update( { "CALL_FUNCTION_EX": ("%c(*%P)", 0, (1, 2, ", ", 100)), # Not quite right @@ -154,6 +169,9 @@ def n_classdef36(node): if node == "classdefdeco2": if isinstance(node[1][1].attr, str): class_name = node[1][1].attr + if self.is_pypy and class_name.find("") > 0: + class_name = class_name.split(".")[-1] + else: class_name = node[1][2].attr build_class = node @@ -173,7 +191,7 @@ def n_classdef36(node): code_node = build_class[1][1] else: code_node = build_class[1][0] - class_name = code_node.attr.co_name + class_name = get_code_name(code_node.attr) assert "mkfunc" == build_class[1] mkfunc = build_class[1] @@ -190,23 +208,24 @@ def n_classdef36(node): elif build_class[1][0] == "load_closure": # Python 3 with closures not functions load_closure = build_class[1] - if hasattr(load_closure[-3], "attr"): - # Python 3.3 classes with closures work like this. - # Note have to test before 3.2 case because - # index -2 also has an attr. - subclass_code = load_closure[-3].attr - elif hasattr(load_closure[-2], "attr"): - # Python 3.2 works like this - subclass_code = load_closure[-2].attr - else: - raise "Internal Error n_classdef: cannot find class body" + subclass_code = None + for i in range(-4, -1): + if load_closure[i] == "LOAD_CODE": + subclass_code = load_closure[i].attr + break + if subclass_code is None: + raise RuntimeError( + "Internal Error n_classdef: cannot find " "class body" + ) if hasattr(build_class[3], "__len__"): if not subclass_info: subclass_info = build_class[3] elif hasattr(build_class[2], "__len__"): subclass_info = build_class[2] else: - raise "Internal Error n_classdef: cannot superclass name" + raise RuntimeError( + "Internal Error n_classdef: cannot " "superclass name" + ) elif node == "classdefdeco2": subclass_info = node subclass_code = build_class[1][0].attr @@ -257,7 +276,7 @@ def call_ex_kw(node): if value == "": fmt = "%c(%p)" else: - fmt = "%%c(%s, %%p)" % value + fmt = "%c" + ("(%s, " % value).replace("%", "%%") + "%p)" self.template_engine( (fmt, (0, "expr"), (2, "build_map_unpack_with_call", 100)), node @@ -276,7 +295,7 @@ def call_ex_kw2(node): if value == "": fmt = "%c(%p)" else: - fmt = "%%c(%s, %%p)" % value + fmt = "%c" + ("(%s, " % value).replace("%", "%%") + "%p)" self.template_engine( (fmt, (0, "expr"), (2, "build_map_unpack_with_call", 100)), node @@ -380,7 +399,7 @@ def format_pos_args(node): def call36_tuple(node): """ - A tuple used in a call, these are like normal tuples but they + A tuple used in a call; these are like normal tuples, but they don't have the enclosing parenthesis. """ assert node == "tuple" @@ -655,7 +674,7 @@ def n_joined_str(node): else: # {{ and }} in Python source-code format strings mean # { and } respectively. But only when *not* part of a - # formatted value. However in the LOAD_STR + # formatted value. However, in the LOAD_STR # bytecode, the escaping of the braces has been # removed. So we need to put back the braces escaping in # reconstructing the source. @@ -680,6 +699,17 @@ def n_joined_str(node): self.n_joined_str = n_joined_str + def n_list_comp_async(node): + self.write("[") + if node[0].kind == "load_closure": + self.listcomp_closure3(node) + else: + self.comprehension_walk_newer(node, iter_index=3, code_index=0) + self.write("]") + self.prune() + + self.n_list_comp_async = n_list_comp_async + # def kwargs_only_36(node): # keys = node[-1].attr # num_kwargs = len(keys) diff --git a/uncompyle6/semantics/customize37.py b/uncompyle6/semantics/customize37.py index 976c340bb..60210800a 100644 --- a/uncompyle6/semantics/customize37.py +++ b/uncompyle6/semantics/customize37.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022 by Rocky Bernstein +# Copyright (c) 2019-2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -16,16 +16,16 @@ """ import re -from uncompyle6.semantics.consts import ( - PRECEDENCE, - TABLE_DIRECT, - INDENT_PER_LEVEL, -) +from uncompyle6.semantics.consts import INDENT_PER_LEVEL, PRECEDENCE from uncompyle6.semantics.helper import flatten_list +# FIXME get from a newer xdis +FSTRING_CONVERSION_MAP = {1: "!s", 2: "!r", 3: "!a", "X": ":X"} + + ####################### -def customize_for_version37(self, version): +def customize_for_version37(self, version: tuple): ######################## # Python 3.7+ changes ####################### @@ -39,20 +39,25 @@ def customize_for_version37(self, version): PRECEDENCE["call_ex_kw4"] = 1 PRECEDENCE["call_kw"] = 0 PRECEDENCE["call_kw36"] = 1 - PRECEDENCE["formatted_value1"] = 100 + PRECEDENCE["formatted_value1"] = 38 # f"...". This has to be below "named_expr" to make + # f'{(x := 10)}' preserve parenthesis + PRECEDENCE["formatted_value2"] = 38 # See above PRECEDENCE["if_exp_37a"] = 28 PRECEDENCE["if_exp_37b"] = 28 PRECEDENCE["dict_unpack"] = 0 # **{...} # fmt: on - TABLE_DIRECT.update( + self.TABLE_DIRECT.update( { "and_not": ("%c and not %c", (0, "expr"), (2, "expr")), "ann_assign": ( - "%|%[2]{attr}: %c\n", 0, + "%|%[2]{attr}: %c\n", + 0, ), "ann_assign_init": ( - "%|%[2]{attr}: %c = %c\n", 0, 1, + "%|%[2]{attr}: %c = %c\n", + 0, + 1, ), "async_for_stmt": ( "%|async for %c in %c:\n%+%c%-\n\n", @@ -62,9 +67,9 @@ def customize_for_version37(self, version): ), "async_for_stmt37": ( "%|async for %c in %c:\n%+%c%-\n\n", - (7, "store"), + (8, "store"), (1, "expr"), - (16, "for_block"), + (17, ("for_block", "pass")), ), "async_with_stmt": ("%|async with %c:\n%+%c%-", (0, "expr"), 3), "async_with_as_stmt": ( @@ -75,62 +80,69 @@ def customize_for_version37(self, version): ), "async_forelse_stmt": ( "%|async for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n", - (7, "store"), + (8, "store"), (1, "expr"), - (17, "for_block"), - (25, "else_suite"), + (-10, "for_block"), + (-2, "else_suite"), ), "attribute37": ("%c.%[1]{pattr}", (0, "expr")), - "attributes37": ("%[0]{pattr} import %c", - (0, "IMPORT_NAME_ATTR"), - (1, "IMPORT_FROM")), - + "attributes37": ( + "%[0]{pattr} import %c", + (0, "IMPORT_NAME_ATTR"), + (1, "IMPORT_FROM"), + ), # nested await expressions like: # return await (await bar()) # need parenthesis. # Note there are async dictionary expressions are like await expr's # the below is just the default fersion - "await_expr": ("await %p", (0, PRECEDENCE["await_expr"]-1)), - + "await_expr": ("await %p", (0, PRECEDENCE["await_expr"] - 1)), "await_stmt": ("%|%c\n", 0), "c_async_with_stmt": ("%|async with %c:\n%+%c%-", (0, "expr"), 3), "call_ex": ("%c(%p)", (0, "expr"), (1, 100)), - "compare_chained1a_37": ( + "compared_chained_middlea_37": ( ' %[3]{pattr.replace("-", " ")} %p %p', (0, PRECEDENCE["compare"] - 1), (-4, PRECEDENCE["compare"] - 1), ), - "compare_chained1_false_37": ( + "compared_chained_middle_false_37": ( ' %[3]{pattr.replace("-", " ")} %p %p', (0, PRECEDENCE["compare"] - 1), (-4, PRECEDENCE["compare"] - 1), ), - "compare_chained2_false_37": ( + "compare_chained_right_false_37": ( ' %[3]{pattr.replace("-", " ")} %p %p', (0, PRECEDENCE["compare"] - 1), (-5, PRECEDENCE["compare"] - 1), ), - "compare_chained1b_false_37": ( + "compared_chained_middleb_false_37": ( ' %[3]{pattr.replace("-", " ")} %p %p', (0, PRECEDENCE["compare"] - 1), (-4, PRECEDENCE["compare"] - 1), ), - "compare_chained1c_37": ( + "compared_chained_middlec_37": ( ' %[3]{pattr.replace("-", " ")} %p %p', (0, PRECEDENCE["compare"] - 1), (-2, PRECEDENCE["compare"] - 1), ), - "compare_chained2a_37": ('%[1]{pattr.replace("-", " ")} %p', (0, PRECEDENCE["compare"] - 1)), - "compare_chained2b_false_37": ('%[1]{pattr.replace("-", " ")} %p', (0, PRECEDENCE["compare"] - 1)), - "compare_chained2a_false_37": ('%[1]{pattr.replace("-", " ")} %p', (0, PRECEDENCE["compare"] - 1)), - "compare_chained2c_37": ( + "compare_chained_righta_37": ( + '%[1]{pattr.replace("-", " ")} %p', + (0, PRECEDENCE["compare"] - 1), + ), + "compare_chained_rightb_false_37": ( + '%[1]{pattr.replace("-", " ")} %p', + (0, PRECEDENCE["compare"] - 1), + ), + "compare_chained_righta_false_37": ( + '%[1]{pattr.replace("-", " ")} %p', + (0, PRECEDENCE["compare"] - 1), + ), + "compare_chained_rightc_37": ( '%[3]{pattr.replace("-", " ")} %p %p', (0, PRECEDENCE["compare"] - 1), (6, PRECEDENCE["compare"] - 1), ), - 'if_exp37': ( '%p if %c else %c', - (1, 'expr', 27), 0, 3 ), - + "if_exp37": ("%p if %c else %c", (1, "expr", 27), 0, 3), "except_return": ("%|except:\n%+%c%-", 3), "if_exp_37a": ( "%p if %p else %p", @@ -145,13 +157,17 @@ def customize_for_version37(self, version): (5, "expr", 27), ), "ifstmtl": ("%|if %c:\n%+%c%-", (0, "testexpr"), (1, "_ifstmts_jumpl")), - 'import_as37': ( '%|import %c as %c\n', 2, -2), + "import_as37": ("%|import %c as %c\n", 2, -2), + "import_from37": ("%|from %[2]{pattr} import %c\n", (3, "importlist37")), "import_from_as37": ( "%|from %c as %c\n", (2, "import_from_attr37"), (3, "store"), ), - "import_one": ("%c", (0, "importlists"),), + "import_one": ( + "%c", + (0, "importlists"), + ), "importattr37": ("%c", (0, "IMPORT_NAME_ATTR")), "import_from_attr37": ( "%c import %c", @@ -160,19 +176,15 @@ def customize_for_version37(self, version): ), "list_afor": ( " async for %[1]{%c} in %c%[1]{%c}", - (1, "store"), (0, "get_aiter"), (3, "list_iter"), - ), - - "list_afor": ( - " async for %[1]{%c} in %c%[1]{%c}", - (1, "store"), (0, "get_aiter"), (3, "list_iter"), + (1, "store"), + (0, "get_aiter"), + (3, "list_iter"), ), - "list_if37": (" if %p%c", (0, 27), 1), "list_if37_not": (" if not %p%c", (0, 27), 1), "testfalse_not_or": ("not %c or %c", (0, "expr"), (2, "expr")), "testfalse_not_and": ("not (%c)", 0), - "testfalsel": ("not %c", (0, "expr")), + "testfalsel": ("not %c", (0, "expr")), "try_except36": ("%|try:\n%+%c%-%c\n\n", 1, -2), "tryfinally36": ("%|try:\n%+%c%-%|finally:\n%+%c%-\n\n", (1, "returns"), 3), "dict_unpack": ("{**%C}", (0, -1, ", **")), @@ -210,12 +222,12 @@ def n_assert_invert(node): def n_async_call(node): self.f.write("async ") - node.kind == "call" + node.kind = "call" p = self.prec self.prec = 80 self.template_engine(("%c(%P)", 0, (1, -4, ", ", 100)), node) self.prec = p - node.kind == "async_call" + node.kind = "async_call" self.prune() self.n_async_call = n_async_call @@ -252,6 +264,8 @@ def n_build_list_unpack(node): if lastnodetype.startswith("BUILD_LIST"): self.write("[") endchar = "]" + else: + endchar = "" flat_elems = flatten_list(node) @@ -403,12 +417,17 @@ def n_call(node): self.n_call = n_call def n_compare_chained(node): - if node[0] == "compare_chained37": + if node[0] in ( + "c_compare_chained37", + "c_compare_chained37_false", + "compare_chained37", + "compare_chained37_false", + ): self.default(node[0]) else: self.default(node) - self.n_compare_chained = n_compare_chained + self.n_compare_chained = self.n_c_compare_chained = n_compare_chained def n_importlist37(node): if len(node) == 1: @@ -434,3 +453,26 @@ def n_list_comp_async(node): self.prune() self.n_list_comp_async = n_list_comp_async + + # FIXME: The following adjusts I guess a bug in the parser. + # It might be as simple as renaming grammar symbol "testtrue" to "testtrue_or_false" + # and then keeping this as is with the name change. + # Fixing in the parsing by inspection is harder than doing it here. + def n_testtrue(node): + compare_chained37 = node[0] + if ( + compare_chained37 == "compare_chained37" + and compare_chained37[1] == "compared_chained_middleb_37" + ): + compared_chained_middleb_37 = compare_chained37[1] + if ( + len(compared_chained_middleb_37) > 2 + and compared_chained_middleb_37[-2] == "JUMP_FORWARD" + ): + node.kind = "testfalse" + pass + pass + self.default(node) + return + + self.n_testtrue = n_testtrue diff --git a/uncompyle6/semantics/customize38.py b/uncompyle6/semantics/customize38.py index 843ecf1cc..5b3684bbb 100644 --- a/uncompyle6/semantics/customize38.py +++ b/uncompyle6/semantics/customize38.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020 by Rocky Bernstein +# Copyright (c) 2019-2020, 2022, 2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -20,138 +20,342 @@ ####################### from uncompyle6.semantics.consts import PRECEDENCE, TABLE_DIRECT +from uncompyle6.semantics.customize37 import FSTRING_CONVERSION_MAP +from uncompyle6.semantics.helper import escape_string, strip_quotes -def customize_for_version38(self, version): +def customize_for_version38(self, version: tuple): # FIXME: pytest doesn't add proper keys in testing. Reinstate after we have fixed pytest. # for lhs in 'for forelsestmt forelselaststmt ' - # 'forelselaststmtl tryfinally38'.split(): + # 'forelselaststmtc tryfinally38'.split(): # del TABLE_DIRECT[lhs] + self.TABLE_DIRECT.update( + { + "async_for_stmt38": ( + "%|async for %c in %c:\n%+%c%-%-\n\n", + (2, "store"), + (0, "expr"), + (3, ("for_block", "pass")), + ), + "async_forelse_stmt38": ( + "%|async for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n", + (7, "store"), + (0, "expr"), + (8, "for_block"), + (-1, "else_suite"), + ), + "async_with_stmt38": ( + "%|async with %c:\n%+%c%-\n", + (0, "expr"), + (7, ("l_stmts_opt", "l_stmts", "pass")), + ), + "async_with_as_stmt38": ( + "%|async with %c as %c:\n%+%|%c%-", + (0, "expr"), + (6, "store"), + (7, "suite_stmts"), + ), + "c_forelsestmt38": ( + "%|for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n", + (2, "store"), + (0, "expr"), + (3, "for_block"), + -1, + ), + "c_tryfinallystmt38": ( + "%|try:\n%+%c%-%|finally:\n%+%c%-\n\n", + (1, "c_suite_stmts_opt"), + (-2, "c_suite_stmts_opt"), + ), + # Python 3.8 reverses the order of keys and items + # from all prior versions of Python. + "dict_comp_body": ( + "%c: %c", + (0, "expr"), + (1, "expr"), + ), + "except_cond1a": ( + "%|except %c:\n", + (1, "expr"), + ), + "except_cond_as": ( + "%|except %c as %c:\n", + (1, "expr"), + (-2, "STORE_FAST"), + ), + "except_handler38": ("%c", (2, "except_stmts")), + "except_handler38a": ("%c", (-2, "stmts")), + "except_handler38c": ( + "%c%+%c%-", + (1, "except_cond1a"), + (2, "except_stmts"), + ), + "except_handler_as": ( + "%c%+\n%+%c%-", + (1, "except_cond_as"), + (2, "tryfinallystmt"), + ), + "except_ret38a": ("return %c", (4, "expr")), + # Note: there is a suite_stmts_opt which seems + # to be bookkeeping which is not expressed in source code + "except_ret38": ("%|return %c\n", (1, "expr")), + "for38": ( + "%|for %c in %c:\n%+%c%-\n\n", + (2, "store"), + (0, "expr"), + (3, "for_block"), + ), + "forelsestmt38": ( + "%|for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n", + (2, "store"), + (0, "expr"), + (3, "for_block"), + -1, + ), + "forelselaststmt38": ( + "%|for %c in %c:\n%+%c%-%|else:\n%+%c%-", + (2, "store"), + (0, "expr"), + (3, "for_block"), + -2, + ), + "forelselaststmtc38": ( + "%|for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n", + (2, "store"), + (0, "expr"), + (3, "for_block"), + -2, + ), + "ifpoplaststmtc": ("%|if %c:\n%+%c%-", (0, "testexpr"), (2, "l_stmts")), + "named_expr": ( # AKA "walrus operator" + "%c := %p", + (2, "store"), + (0, "expr", PRECEDENCE["named_expr"] - 1), + ), + "pop_return": ("%|return %c\n", (1, "return_expr")), + "popb_return": ("%|return %c\n", (0, "return_expr")), + "pop_ex_return": ("%|return %c\n", (0, "return_expr")), + "set_for": ( + " for %c in %c", + (2, "store"), + (0, "expr_or_arg"), + ), + "whilestmt38": ( + "%|while %c:\n%+%c%-\n\n", + (1, ("bool_op", "testexpr", "testexprc")), + (2, ("_stmts", "l_stmts", "l_stmts_opt", "pass")), + ), + "whileTruestmt38": ( + "%|while True:\n%+%c%-\n\n", + (1, ("l_stmts", "pass")), + ), + "try_elsestmtl38": ( + "%|try:\n%+%c%-%c%|else:\n%+%c%-", + (1, "suite_stmts_opt"), + (3, "except_handler38"), + (5, "else_suitel"), + ), + "try_except38": ( + "%|try:\n%+%c\n%-%|except:\n%+%c%-\n\n", + (2, ("suite_stmts_opt", "suite_stmts")), + (3, ("except_handler38a", "except_handler38b", "except_handler38c")), + ), + "try_except38r": ( + "%|try:\n%+%c\n%-%|except:\n%+%c%-\n\n", + (1, "return_except"), + (2, "except_handler38b"), + ), + "try_except38r2": ( + "%|try:\n%+%c\n%-%|except:\n%+%c%c%-\n\n", + (1, "suite_stmts_opt"), + (8, "cond_except_stmts_opt"), + (10, "return"), + ), + "try_except38r4": ( + "%|try:\n%+%c\n%-%|except:\n%+%c%c%-\n\n", + (1, "returns_in_except"), + (3, "except_cond1"), + (4, "return"), + ), + "try_except_as": ( + "%|try:\n%+%c%-\n%|%-%c\n\n", + ( + -4, + ("suite_stmts", "_stmts"), + ), # Go from the end because of POP_BLOCK variation + (-3, "except_handler_as"), + ), + "try_except_ret38": ( + "%|try:\n%+%c%-\n%|except:\n%+%|%c%-\n\n", + (1, "returns"), + (2, "except_ret38a"), + ), + "try_except_ret38a": ( + "%|try:\n%+%c%-%c\n\n", + (1, "returns"), + (2, "except_handler38c"), + ), + "tryfinally38rstmt": ( + "%|try:\n%+%c%-%|finally:\n%+%c%-\n\n", + (0, "sf_pb_call_returns"), + (-1, ("ss_end_finally", "suite_stmts", "_stmts")), + ), + "tryfinally38rstmt2": ( + "%|try:\n%+%c%-%|finally:\n%+%c%-\n\n", + (4, "returns"), + -2, + "ss_end_finally", + ), + "tryfinally38rstmt3": ( + "%|try:\n%+%|return %c%-\n%|finally:\n%+%c%-\n\n", + (1, "expr"), + (-1, "ss_end_finally"), + ), + "tryfinally38rstmt4": ( + "%|try:\n%+%c%-\n%|finally:\n%+%c%-\n\n", + (1, "suite_stmts_opt"), + (5, "suite_stmts_return"), + ), + "tryfinally38stmt": ( + "%|try:\n%+%c%-%|finally:\n%+%c%-\n\n", + (1, "suite_stmts_opt"), + (6, "suite_stmts_opt"), + ), + "tryfinally38astmt": ( + "%|try:\n%+%c%-%|finally:\n%+%c%-\n\n", + (2, "suite_stmts_opt"), + (8, "suite_stmts_opt"), + ), + "with_as_pass": ( + "%|with %c as %c:\n%+%c%-", + (0, "expr"), + (2, "store"), + (3, "pass"), + ), + } + ) + + def except_return_value(node): + if node[0] == "POP_BLOCK": + self.default(node[1]) + else: + self.template_engine(("%|return %c\n", (0, "expr")), node) + self.prune() + + self.n_except_return_value = except_return_value + + # FIXME: now that we've split out cond_except_stmt, + # we should be able to get this working as a pure transformation rule, + # so no procedure is needed here. + def try_except38r3(node): + self.template_engine(("%|try:\n%+%c\n%-", (1, "suite_stmts_opt")), node) + cond_except_stmts_opt = node[5] + assert cond_except_stmts_opt == "cond_except_stmts_opt" + for child in cond_except_stmts_opt: + if child == "cond_except_stmt": + if child[0] == "except_cond1": + self.template_engine( + ("%c\n", (0, "except_cond1"), (1, "expr")), child + ) + self.template_engine(("%+%c%-\n", (1, "except_stmts")), child) + pass + pass + self.template_engine(("%+%c%-\n", (7, "return")), node) + self.prune() + + self.n_try_except38r3 = try_except38r3 + + def n_list_afor(node): + if len(node) == 2: + # list_afor ::= get_iter list_afor + self.comprehension_walk_newer(node, 0) + else: + list_iter_index = 2 if node[2] == "list_iter" else 3 + self.template_engine( + ( + " async for %[1]{%c} in %c%[1]{%c}", + (1, "store"), + (0, "get_aiter"), + (list_iter_index, "list_iter"), + ), + node, + ) + self.prune() + + self.n_list_afor = n_list_afor - TABLE_DIRECT.update({ - "async_for_stmt38": ( - "%|async for %c in %c:\n%+%c%-%-\n\n", - (2, "store"), (0, "expr"), (3, "for_block") ), - - 'async_forelse_stmt38': ( - '%|async for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n', - (7, 'store'), (0, 'expr'), (8, 'for_block'), (-1, 'else_suite') ), - - "async_with_stmt38": ( - "%|async with %c:\n%+%|%c%-", - (0, "expr"), 7), - - "async_with_as_stmt38": ( - "%|async with %c as %c:\n%+%|%c%-", - (0, "expr"), (6, "store"), - (7, "suite_stmts") - ), - - "except_cond_as": ( - "%|except %c as %c:\n", - (1, "expr"), - (-2, "STORE_FAST"), - ), - - 'except_handler38': ( - '%c', (2, 'except_stmts') ), - - 'except_handler38a': ( - '%c', (-2, 'stmts') ), - - "except_handler_as": ( - "%c%+\n%+%c%-", - (1, "except_cond_as"), - (2, "tryfinallystmt"), - ), - - 'except_ret38a': ( - 'return %c', (4, 'expr') ), - - # Note: there is a suite_stmts_opt which seems - # to be bookkeeping which is not expressed in source code - 'except_ret38': ( '%|return %c\n', (1, 'expr') ), - - 'for38': ( - '%|for %c in %c:\n%+%c%-\n\n', - (2, 'store'), - (0, 'expr'), - (3, 'for_block') ), - - "forelsestmt38": ( - "%|for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n", - (2, "store"), - (0, "expr"), - (3, "for_block"), -1 ), - - 'forelselaststmt38': ( - '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-', - (2, 'store'), - (0, 'expr'), - (3, 'for_block'), -2 ), - 'forelselaststmtl38': ( - '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n', - (2, 'store'), - (0, 'expr'), - (3, 'for_block'), -2 ), - - 'ifpoplaststmtl': ( '%|if %c:\n%+%c%-', - (0, "testexpr"), - (2, "c_stmts" ) ), - - 'ifstmtl': ( '%|if %c:\n%+%c%-', - (0, "testexpr"), - (1, "_ifstmts_jumpl") ), - - 'whilestmt38': ( '%|while %c:\n%+%c%-\n\n', - (1, 'testexpr'), - 2 ), # "l_stmts" or "pass" - 'whileTruestmt38': ( '%|while True:\n%+%c%-\n\n', - 1 ), # "l_stmts" or "pass" - 'try_elsestmtl38': ( - '%|try:\n%+%c%-%c%|else:\n%+%c%-', - (1, 'suite_stmts_opt'), - (3, 'except_handler38'), - (5, 'else_suitel') ), - 'try_except38': ( - '%|try:\n%+%c\n%-%|except:\n%|%-%c\n\n', - (-2, 'suite_stmts_opt'), (-1, 'except_handler38a') ), - - "try_except_as": ( - "%|try:\n%+%c%-\n%|%-%c\n\n", - (-4, "suite_stmts"), # Go from the end because of POP_BLOCK variation - (-3, "except_handler_as"), - ), - - "try_except_ret38": ( - "%|try:\n%+%c%-\n%|except:\n%+%|%c%-\n\n", - (1, "returns"), - (2, "except_ret38a"), - ), - 'tryfinally38rstmt': ( - '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', - (0, "sf_pb_call_returns"), - (-1, ("ss_end_finally", "suite_stmts")), - ), - "tryfinally38rstmt2": ( - "%|try:\n%+%c%-%|finally:\n%+%c%-\n\n", - (4, "returns"), - -2, "ss_end_finally" - ), - "tryfinally38rstmt3": ( - "%|try:\n%+%|return %c%-\n%|finally:\n%+%c%-\n\n", - (1, "expr"), - (-1, "ss_end_finally") - ), - 'tryfinally38stmt': ( - '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', - (1, "suite_stmts_opt"), - (6, "suite_stmts_opt") ), - 'tryfinally38astmt': ( - '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', - (2, "suite_stmts_opt"), - (8, "suite_stmts_opt") ), - "named_expr": ( # AKA "walrus operator" - "%c := %p", (2, "store"), (0, "expr", PRECEDENCE["named_expr"]-1) + def n_set_afor(node): + if len(node) == 2: + self.template_engine( + (" async for %[1]{%c} in %c", (1, "store"), (0, "get_aiter")), node ) - }) + else: + self.template_engine( + " async for %[1]{%c} in %c%c", + (1, "store"), + (0, "get_aiter"), + (2, "set_iter"), + ) + self.prune() + + self.n_set_afor = n_set_afor + + def n_formatted_value_debug(node): + p = self.prec + self.prec = 100 + + formatted_value = node[1] + value_equal = node[0].attr + assert formatted_value.kind.startswith("formatted_value") + old_in_format_string = self.in_format_string + self.in_format_string = formatted_value.kind + format_value_attr = node[-1] + + post_str = "" + if node[-1] == "BUILD_STRING_3": + post_load_str = node[-2] + post_str = self.traverse(post_load_str, indent="") + post_str = strip_quotes(post_str) + + if format_value_attr == "FORMAT_VALUE_ATTR": + attr = format_value_attr.attr + if attr & 4: + fmt = strip_quotes(self.traverse(node[3], indent="")) + attr_flags = attr & 3 + if attr_flags: + conversion = "%s:%s" % ( + FSTRING_CONVERSION_MAP.get(attr_flags, ""), + fmt, + ) + else: + conversion = ":%s" % fmt + else: + conversion = FSTRING_CONVERSION_MAP.get(attr, "") + f_str = "f%s" % escape_string( + "{%s%s}%s" % (value_equal, conversion, post_str) + ) + else: + f_conversion = self.traverse(formatted_value, indent="") + # Remove leaving "f" and quotes + conversion = strip_quotes(f_conversion[1:]) + f_str = "f%s" % escape_string(f"{value_equal}{conversion}" + post_str) + + self.write(f_str) + self.in_format_string = old_in_format_string + + self.prec = p + self.prune() + + self.n_formatted_value_debug = n_formatted_value_debug + + def n_suite_stmts_return(node): + if len(node) > 1: + assert len(node) == 2 + self.template_engine( + ("%c\n%|return %c", (0, ("_stmts", "suite_stmts")), (1, "expr")), node + ) + else: + self.template_engine(("%|return %c", (0, "expr")), node) + self.prune() + + self.n_suite_stmts_return = n_suite_stmts_return diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index dc4e8e47b..7610b554e 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2019, 2021-2022 by Rocky Bernstein +# Copyright (c) 2015-2019, 2021-2025 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -63,38 +63,37 @@ # FIXME: DRY code with pysource -from __future__ import print_function - import re +from bisect import bisect_right +from collections import namedtuple +from typing import Optional -from uncompyle6.semantics import pysource -from uncompyle6 import parser -from uncompyle6.scanner import Token, Code, get_scanner -import uncompyle6.parser as python_parser -from uncompyle6.semantics.check_ast import checker - -from uncompyle6.show import maybe_show_asm, maybe_show_tree - -from uncompyle6.parsers.treenode import SyntaxTree - -from uncompyle6.semantics.pysource import ParserError, StringIO +from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG +from spark_parser.ast import GenericASTTraversalPruningException from xdis import iscode from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE +from uncompyle6.parser import ParserError as ParserError, parse +from uncompyle6.parsers.treenode import SyntaxTree +from uncompyle6.scanner import Code, Token, get_scanner +from uncompyle6.semantics.check_ast import checker from uncompyle6.semantics.consts import ( INDENT_PER_LEVEL, NONE, + PASS, PRECEDENCE, TABLE_DIRECT, escape, - MAP, - PASS, ) - -from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG -from spark_parser.ast import GenericASTTraversalPruningException - -from collections import namedtuple +from uncompyle6.semantics.helper import find_code_node +from uncompyle6.semantics.pysource import ( + DEFAULT_DEBUG_OPTS, + TREE_DEFAULT_DEBUG, + SourceWalker, + StringIO, + find_globals_and_nonlocals, +) +from uncompyle6.show import maybe_show_asm, maybe_show_tree NodeInfo = namedtuple("NodeInfo", "node start finish") ExtractInfo = namedtuple( @@ -148,23 +147,23 @@ } -class FragmentsWalker(pysource.SourceWalker, object): - +class FragmentsWalker(SourceWalker, object): MAP_DIRECT_FRAGMENT = () stacked_params = ("f", "indent", "is_lambda", "_globals") def __init__( self, - version, + version: tuple, scanner, - showast=False, + showast=TREE_DEFAULT_DEBUG, debug_parser=PARSER_DEFAULT_DEBUG, compile_mode="exec", - is_pypy=False, + is_pypy=IS_PYPY, + linestarts={}, tolerate_errors=True, ): - pysource.SourceWalker.__init__( + SourceWalker.__init__( self, version=version, out=StringIO(), @@ -173,6 +172,7 @@ def __init__( debug_parser=debug_parser, compile_mode=compile_mode, is_pypy=is_pypy, + linestarts=linestarts, tolerate_errors=tolerate_errors, ) @@ -189,8 +189,7 @@ def __init__( self.is_pypy = is_pypy # FIXME: is there a better way? - global MAP_DIRECT_FRAGMENT - MAP_DIRECT_FRAGMENT = (dict(TABLE_DIRECT, **TABLE_DIRECT_FRAGMENT),) + self.MAP_DIRECT_FRAGMENT = (dict(TABLE_DIRECT, **TABLE_DIRECT_FRAGMENT),) return f = property( @@ -238,7 +237,7 @@ def set_pos_info(self, node, start, finish, name=None): def preorder(self, node=None): start = len(self.f.getvalue()) - super(pysource.SourceWalker, self).preorder(node) + super(SourceWalker, self).preorder(node) self.set_pos_info(node, start, len(self.f.getvalue())) return @@ -346,7 +345,6 @@ def n_return(self, node): self.prune() # stop recursing def n_return_if_stmt(self, node): - start = len(self.f.getvalue()) + len(self.indent) if self.params["is_lambda"]: node[0].parent = node @@ -598,17 +596,7 @@ def n_alias(self, node): def n_mkfunc(self, node): start = len(self.f.getvalue()) - if self.version >= (3, 3) or node[-2] == "kwargs": - # LOAD_CONST code object .. - # LOAD_CONST 'x0' if >= 3.3 - # MAKE_FUNCTION .. - code_node = node[-3] - elif node[-2] == "expr": - code_node = node[-2][0] - else: - # LOAD_CONST code object .. - # MAKE_FUNCTION .. - code_node = node[-2] + code_node = find_code_node(node, -2) func_name = code_node.attr.co_name self.write(func_name) self.set_pos_info(code_node, start, len(self.f.getvalue())) @@ -656,6 +644,17 @@ def comprehension_walk(self, node, iter_index, code_index=-5): code = Code(cn.attr, self.scanner, self.currentclass) ast = self.build_ast(code._tokens, code._customize, code) + + self.MAP_DIRECT = (self.TABLE_DIRECT,) + self.MAP_R = (self.TABLE_R, -1) + + self.MAP = { + "stmt": self.MAP_R, + "call": self.MAP_R, + "delete": self.MAP_R, + "store": self.MAP_R, + } + self.customize(code._customize) # Remove single reductions as in ("stmts", "sstmt"): @@ -664,10 +663,10 @@ def comprehension_walk(self, node, iter_index, code_index=-5): n = ast[iter_index] - assert n == "comp_iter" + assert n == "comp_iter", n.kind # Find the comprehension body. It is the inner-most # node that is not list_.. . - while n == "comp_iter": # list_iter + while n == "comp_iter": # list_iter n = n[0] # recurse one step if n == "comp_for": if n[0] == "SETUP_LOOP": @@ -723,7 +722,7 @@ def comprehension_walk3(self, node, iter_index, code_index=-5): assert iscode(code), node[code_index] code_name = code.co_name - code = Code(code, self.scanner, self.currentclass) + code = Code(code, self.scanner, self.currentclass, self.debug_opts["asm"]) ast = self.build_ast(code._tokens, code._customize, code) @@ -1070,13 +1069,17 @@ def n_classdef(self, node): # Python 3.2 works like this subclass = load_closure[-2].attr else: - raise "Internal Error n_classdef: cannot find class body" + raise RuntimeError( + "Internal Error n_classdef: cannot find class body" + ) if hasattr(buildclass[3], "__len__"): subclass_info = buildclass[3] elif hasattr(buildclass[2], "__len__"): subclass_info = buildclass[2] else: - raise "Internal Error n_classdef: cannot superclass name" + raise RuntimeError( + "Internal Error n_classdef: cannot superclass name" + ) else: subclass = buildclass[1][0].attr subclass_info = node[0] @@ -1123,8 +1126,15 @@ def n_classdef(self, node): n_classdefdeco2 = n_classdef - def gen_source(self, ast, name, customize, is_lambda=False, returnNone=False, - debug_opts=None): + def gen_source( + self, + ast, + name, + customize, + is_lambda=False, + returnNone=False, + debug_opts=DEFAULT_DEBUG_OPTS, + ): """convert parse tree to Python source code""" rn = self.return_none @@ -1150,7 +1160,6 @@ def build_ast( noneInNames=False, is_top_level_module=False, ): - # FIXME: DRY with pysource.py # assert isinstance(tokens[0], Token) @@ -1168,12 +1177,15 @@ def build_ast( p_insts = self.p.insts self.p.insts = self.scanner.insts self.p.offset2inst_index = self.scanner.offset2inst_index - ast = python_parser.parse(self.p, tokens, customize, code) + ast = parse(self.p, tokens, customize, code) + self.customize(customize) self.p.insts = p_insts - except (python_parser.ParserError, AssertionError) as e: + + except (ParserError, AssertionError) as e: raise ParserError(e, tokens) + transform_tree = self.treeTransform.transform(ast, code) maybe_show_tree(self, ast) - return ast + return transform_tree # The bytecode for the end of the main routine has a # "return None". However you can't issue a "return" statement in @@ -1199,23 +1211,28 @@ def build_ast( if len(tokens) == 0: return PASS - # Build parse tree from tokenized and massaged disassembly. + # Build a parse tree from tokenized and massaged disassembly. try: # FIXME: have p.insts update in a better way # modularity is broken here p_insts = self.p.insts self.p.insts = self.scanner.insts self.p.offset2inst_index = self.scanner.offset2inst_index - ast = parser.parse(self.p, tokens, customize, code) + self.p.opc = self.scanner.opc + ast = parse(self.p, tokens, customize, code) self.p.insts = p_insts - except (parser.ParserError, AssertionError) as e: + except (ParserError, AssertionError) as e: raise ParserError(e, tokens, {}) - maybe_show_tree(self, ast) - checker(ast, False, self.ast_errors) - return ast + self.customize(customize) + transform_tree = self.treeTransform.transform(ast, code) + + maybe_show_tree(self, ast) + + del ast # Save memory + return transform_tree # FIXME: we could provide another customized routine # that fixes up parents along a particular path to a node that @@ -1345,7 +1362,7 @@ def extract_node_info(self, nodeInfo): selectedText = text[start:finish] # Compute offsets relative to the beginning of the - # line rather than the beinning of the text + # line rather than the beginning of the text. try: lineStart = text[:start].rindex("\n") + 1 except ValueError: @@ -1353,7 +1370,7 @@ def extract_node_info(self, nodeInfo): adjustedStart = start - lineStart # If selected text is greater than a single line - # just show the first line plus elipses. + # just show the first line plus ellipsis. lines = selectedText.split("\n") if len(lines) > 1: adjustedEnd = len(lines[0]) - adjustedStart @@ -1426,7 +1443,7 @@ def extract_parent_info(self, node): p = node.parent orig_parent = p # If we can get different text, use that as the parent, - # otherwise we'll use the immeditate parent + # otherwise we'll use the immediatate parent. while p and ( hasattr(p, "parent") and p.start == node.start and p.finish == node.finish ): @@ -1455,12 +1472,11 @@ def print_super_classes(self, node): self.set_pos_info(node, start, len(self.f.getvalue())) def print_super_classes3(self, node): - # FIXME: wrap superclasses onto a node # as a custom rule start = len(self.f.getvalue()) n = len(node) - 1 - + j = 0 if node.kind != "expr": if node == "kwarg": self.template_engine(("(%[0]{attr}=%c)", 1), node) @@ -1474,7 +1490,7 @@ def print_super_classes3(self, node): # FIXME: this doesn't handle positional and keyword args # properly. Need to do something more like that below # in the non-PYPY 3.6 case. - self.template_engine(('(%[0]{attr}=%c)', 1), node[n-1]) + self.template_engine(("(%[0]{attr}=%c)", 1), node[n - 1]) return else: kwargs = node[n - 1].attr @@ -1504,9 +1520,9 @@ def print_super_classes3(self, node): self.write("(") if kwargs: # Last arg is tuple of keyword values: omit - l = n - 1 + m = n - 1 else: - l = n + m = n if kwargs: # 3.6+ does this @@ -1518,7 +1534,7 @@ def print_super_classes3(self, node): j += 1 j = 0 - while i < l: + while i < m: self.write(sep) value = self.traverse(node[i]) self.write("%s=%s" % (kwargs[j], value)) @@ -1526,7 +1542,7 @@ def print_super_classes3(self, node): j += 1 i += 1 else: - while i < l: + while i < m: value = self.traverse(node[i]) i += 1 self.write(sep, value) @@ -1564,19 +1580,19 @@ def n_dict(self, node): if node[0].kind.startswith("kvlist"): # Python 3.5+ style key/value list in dict kv_node = node[0] - l = list(kv_node) - length = len(l) + ll = list(kv_node) + length = len(ll) if kv_node[-1].kind.startswith("BUILD_MAP"): length -= 1 i = 0 while i < length: self.write(sep) - name = self.traverse(l[i], indent="") - l[i].parent = kv_node - l[i + 1].parent = kv_node + name = self.traverse(ll[i], indent="") + ll[i].parent = kv_node + ll[i + 1].parent = kv_node self.write(name, ": ") value = self.traverse( - l[i + 1], indent=self.indent + (len(name) + 2) * " " + ll[i + 1], indent=self.indent + (len(name) + 2) * " " ) self.write(sep, name, ": ", value) sep = line_seperator @@ -1586,25 +1602,25 @@ def n_dict(self, node): elif len(node) > 1 and node[1].kind.startswith("kvlist"): # Python 3.0..3.4 style key/value list in dict kv_node = node[1] - l = list(kv_node) - if len(l) > 0 and l[0].kind == "kv3": + ll = list(kv_node) + if len(ll) > 0 and ll[0].kind == "kv3": # Python 3.2 does this kv_node = node[1][0] - l = list(kv_node) + ll = list(kv_node) i = 0 - while i < len(l): - l[i].parent = kv_node - l[i + 1].parent = kv_node + while i < len(ll): + ll[i].parent = kv_node + ll[i + 1].parent = kv_node key_start = len(self.f.getvalue()) + len(sep) - name = self.traverse(l[i + 1], indent="") + name = self.traverse(ll[i + 1], indent="") key_finish = key_start + len(name) val_start = key_finish + 2 value = self.traverse( - l[i], indent=self.indent + (len(name) + 2) * " " + ll[i], indent=self.indent + (len(name) + 2) * " " ) self.write(sep, name, ": ", value) - self.set_pos_info_recurse(l[i + 1], key_start, key_finish) - self.set_pos_info_recurse(l[i], val_start, val_start + len(value)) + self.set_pos_info_recurse(ll[i + 1], key_start, key_finish) + self.set_pos_info_recurse(ll[i], val_start, val_start + len(value)) sep = line_seperator i += 3 pass @@ -1777,13 +1793,13 @@ def n_list(self, node): n_set = n_tuple = n_build_set = n_list def template_engine(self, entry, startnode): - """The format template interpetation engine. See the comment at the - beginning of this module for the how we interpret format + """The format template interpretation engine. See the comment at the + beginning of this module for how we interpret format specifications such as %c, %C, and so on. """ # print("-----") - # print(startnode) + # print(startnode.kind) # print(entry[0]) # print('======') @@ -1808,7 +1824,7 @@ def template_engine(self, entry, startnode): if m.group("child"): node = node[int(m.group("child"))] node.parent = startnode - except: + except Exception: print(node.__dict__) raise @@ -1838,10 +1854,27 @@ def template_engine(self, entry, startnode): index = entry[arg] if isinstance(index, tuple): - assert node[index[0]] == index[1], ( - "at %s[%d], expected %s node; got %s" - % (node.kind, arg, node[index[0]].kind, index[1]) - ) + if isinstance(index[1], str): + # if node[index[0]] != index[1]: + # from trepan.api import debug; debug() + assert ( + node[index[0]] == index[1] + ), "at %s[%d], expected '%s' node; got '%s'" % ( + node.kind, + arg, + index[1], + node[index[0]].kind, + ) + else: + assert ( + node[index[0]] in index[1] + ), "at %s[%d], expected to be in '%s' node; got '%s'" % ( + node.kind, + arg, + index[1], + node[index[0]].kind, + ) + index = index[0] assert isinstance( index, int @@ -1861,10 +1894,21 @@ def template_engine(self, entry, startnode): assert isinstance(tup, tuple) if len(tup) == 3: (index, nonterm_name, self.prec) = tup - assert node[index] == nonterm_name, ( - "at %s[%d], expected '%s' node; got '%s'" - % (node.kind, arg, nonterm_name, node[index].kind) - ) + if isinstance(tup[1], str): + assert ( + node[index] == nonterm_name + ), "at %s[%d], expected '%s' node; got '%s'" % ( + node.kind, + arg, + nonterm_name, + node[index].kind, + ) + else: + assert node[tup[0]] in tup[1], ( + f"at {node.kind}[{tup[0]}], expected to be in '{tup[1]}' " + f"node; got '{node[tup[0]].kind}'" + ) + else: assert len(tup) == 2 (index, self.prec) = entry[arg] @@ -1937,7 +1981,7 @@ def template_engine(self, entry, startnode): start = len(self.f.getvalue()) self.write(eval(expr, d, d)) self.set_pos_info(node, start, len(self.f.getvalue())) - except: + except Exception: print(node) raise m = escape.search(fmt, i) @@ -1952,15 +1996,14 @@ def template_engine(self, entry, startnode): # FIXME figure out how to get these cases to be table driven. # 2. subroutine calls. It the last op is the call and for purposes of printing - # we don't need to print anything special there. However it encompases the + # we don't need to print anything special there. However it encompasses the # entire string of the node fn(...) if startnode.kind == "call": last_node = startnode[-1] self.set_pos_info(last_node, startnode_start, self.last_finish) return - @classmethod - def _get_mapping(cls, node): + def _get_mapping(self, node): if ( hasattr(node, "data") and len(node) > 0 @@ -1968,7 +2011,7 @@ def _get_mapping(cls, node): and not hasattr(node[-1], "parent") ): node[-1].parent = node - return MAP.get(node, MAP_DIRECT_FRAGMENT) + return self.MAP.get(node, self.MAP_DIRECT_FRAGMENT) pass @@ -1976,6 +2019,7 @@ def _get_mapping(cls, node): # DEFAULT_DEBUG_OPTS = {"asm": False, "tree": False, "grammar": False} + # This interface is deprecated def deparse_code( version, @@ -1986,7 +2030,7 @@ def deparse_code( showgrammar=False, code_objects={}, compile_mode="exec", - is_pypy=None, + is_pypy=IS_PYPY, walker=FragmentsWalker, ): debug_opts = {"asm": showasm, "ast": showast, "grammar": showgrammar} @@ -2011,6 +2055,8 @@ def code_deparse( code_objects={}, compile_mode="exec", walker=FragmentsWalker, + start_offset: int = 0, + stop_offset: int = -1, ): """ Convert the code object co into a python source fragment. @@ -2039,12 +2085,28 @@ def code_deparse( is_pypy = IS_PYPY # store final output stream for case of error - scanner = get_scanner(version, is_pypy=is_pypy) + scanner = get_scanner(version, is_pypy=is_pypy, show_asm=debug_opts["asm"]) show_asm = debug_opts.get("asm", None) tokens, customize = scanner.ingest(co, code_objects=code_objects, show_asm=show_asm) tokens, customize = scanner.ingest(co) + + if start_offset > 0: + for i, t in enumerate(tokens): + # If t.offset is a string, we want to skip this. + if isinstance(t.offset, int) and t.offset >= start_offset: + tokens = tokens[i:] + break + + if stop_offset > -1: + for i, t in enumerate(tokens): + # In contrast to the test for start_offset If t.offset is + # a string, we want to extract the integer offset value. + if t.off2int() >= stop_offset: + tokens = tokens[:i] + break + maybe_show_asm(show_asm, tokens) debug_parser = dict(PARSER_DEFAULT_DEBUG) @@ -2054,19 +2116,23 @@ def code_deparse( debug_parser["errorstack"] = True # Build Syntax Tree from tokenized and massaged disassembly. - # deparsed = pysource.FragmentsWalker(out, scanner, showast=showast) - show_ast = debug_opts.get("ast", None) + # deparsed = FragmentsWalker(out, scanner, showast=showast) + show_tree = debug_opts.get("tree", False) + linestarts = dict(scanner.opc.findlinestarts(co)) deparsed = walker( version, scanner, - showast=show_ast, + showast=show_tree, debug_parser=debug_parser, compile_mode=compile_mode, is_pypy=is_pypy, + linestarts=linestarts, ) is_top_level_module = co.co_name == "" - deparsed.ast = deparsed.build_ast(tokens, customize, co, is_top_level_module=is_top_level_module) + deparsed.ast = deparsed.build_ast( + tokens, customize, co, is_top_level_module=is_top_level_module + ) assert deparsed.ast == "stmts", "Should have parsed grammar start" @@ -2076,12 +2142,12 @@ def code_deparse( # convert leading '__doc__ = "..." into doc string assert deparsed.ast == "stmts" - (deparsed.mod_globs, nonlocals) = pysource.find_globals_and_nonlocals( + (deparsed.mod_globs, _) = find_globals_and_nonlocals( deparsed.ast, set(), set(), co, version ) # Just when you think we've forgotten about what we - # were supposed to to: Generate source from the Syntax ree! + # were supposed to do: Generate source from the Syntax tree! deparsed.gen_source(deparsed.ast, co.co_name, customize) deparsed.set_pos_info(deparsed.ast, 0, len(deparsed.text)) @@ -2111,9 +2177,6 @@ def code_deparse( return deparsed -from bisect import bisect_right - - def find_gt(a, x): "Find leftmost value greater than x" i = bisect_right(a, x) @@ -2127,8 +2190,8 @@ def code_deparse_around_offset( offset, co, out=StringIO(), - version=None, - is_pypy=None, + version: Optional[tuple] = None, + is_pypy: bool = False, debug_opts=DEFAULT_DEBUG_OPTS, ): """ @@ -2139,7 +2202,7 @@ def code_deparse_around_offset( assert iscode(co) if version is None: - version = sysinfo2float() + version = PYTHON_VERSION_TRIPLE if is_pypy is None: is_pypy = IS_PYPY @@ -2157,7 +2220,7 @@ def code_deparse_around_offset( return deparsed -# Deprecated. Here still for compatability +# Deprecated. Here still for compatibility def deparse_code_around_offset( name, offset, @@ -2166,7 +2229,7 @@ def deparse_code_around_offset( out=StringIO(), showasm=False, showast=False, - showgrammar=False, + showgrammar=PARSER_DEFAULT_DEBUG, is_pypy=False, ): debug_opts = {"asm": showasm, "ast": showast, "grammar": showgrammar} @@ -2192,8 +2255,7 @@ def deparsed_find(tup, deparsed, code): """Return a NodeInfo nametuple for a fragment-deparsed `deparsed` at `tup`. `tup` is a name and offset tuple, `deparsed` is a fragment object - and `code` is instruction bytecode. -""" + and `code` is instruction bytecode.""" nodeInfo = None name, last_i = tup if not hasattr(deparsed, "offsets"): diff --git a/uncompyle6/semantics/gencomp.py b/uncompyle6/semantics/gencomp.py index 6341d6da5..932556c91 100644 --- a/uncompyle6/semantics/gencomp.py +++ b/uncompyle6/semantics/gencomp.py @@ -19,13 +19,13 @@ from typing import Optional -from xdis import iscode +from xdis import co_flags_is_async, iscode from uncompyle6.parser import get_python_parser from uncompyle6.scanner import Code +from uncompyle6.scanners.tok import Token from uncompyle6.semantics.consts import PRECEDENCE from uncompyle6.semantics.helper import is_lambda_mode -from uncompyle6.scanners.tok import Token class ComprehensionMixin: @@ -38,11 +38,13 @@ class ComprehensionMixin: Python source code. In source code, the implicit function calls are not seen. """ + def closure_walk(self, node, collection_index): - """Dictionary and comprehensions using closure the way they are done in Python3. + """ + Dictionary and comprehensions using closure the way they are done in Python3. """ p = self.prec - self.prec = 27 + self.prec = PRECEDENCE["lambda_body"] - 1 code_index = 0 if node[0] == "load_genexpr" else 1 tree = self.get_comprehension_function(node, code_index=code_index) @@ -59,6 +61,10 @@ def closure_walk(self, node, collection_index): list_if = None assert n == "comp_iter" + # Pick out important parts of the comprehension: + # * the variables we iterate over: "stores" + # * the results we accumulate: "n" + # Find inner-most node. while n == "comp_iter": n = n[0] # recurse one step @@ -93,9 +99,12 @@ def closure_walk(self, node, collection_index): self.prec = p def comprehension_walk( - self, node, iter_index: Optional[int], code_index: int = -5, + self, + node, + iter_index: Optional[int], + code_index: int = -5, ): - p = self.prec + p: int = self.prec self.prec = PRECEDENCE["lambda_body"] - 1 # FIXME: clean this up @@ -107,7 +116,10 @@ def comprehension_walk( elif node[0] == "load_closure": cn = node[1] - elif self.version >= (3, 0) and node in ("generator_exp", "generator_exp_async"): + elif self.version >= (3, 0) and node in ( + "generator_exp", + "generator_exp_async", + ): if node[0] == "load_genexpr": load_genexpr = node[0] elif node[1] == "load_genexpr": @@ -128,7 +140,7 @@ def comprehension_walk( assert iscode(cn.attr) - code = Code(cn.attr, self.scanner, self.currentclass) + code = Code(cn.attr, self.scanner, self.currentclass, self.debug_opts["asm"]) # FIXME: is there a way we can avoid this? # The problem is that in filter in top-level list comprehensions we can @@ -136,7 +148,9 @@ def comprehension_walk( if is_lambda_mode(self.compile_mode): p_save = self.p self.p = get_python_parser( - self.version, compile_mode="exec", is_pypy=self.is_pypy, + self.version, + compile_mode="exec", + is_pypy=self.is_pypy, ) tree = self.build_ast(code._tokens, code._customize, code) self.p = p_save @@ -148,6 +162,27 @@ def comprehension_walk( while len(tree) == 1: tree = tree[0] + if tree == "stmts": + # FIXME: rest is a return None? + # Verify this + # rest = tree[1:] + tree = tree[0] + elif tree == "lambda_start": + assert len(tree) <= 3 + tree = tree[-2] + if tree == "return_expr_lambda": + tree = tree[1] + pass + + if tree in ( + "genexpr_func", + "genexpr_func_async", + ): + for i in range(3, 5): + if tree[i] == "comp_iter": + iter_index = i + break + n = tree[iter_index] assert n == "comp_iter", n.kind @@ -178,9 +213,11 @@ def comprehension_walk( self.write(" in ") if node[2] == "expr": iter_expr = node[2] + elif node[3] in ("expr", "get_aiter"): + iter_expr = node[3] else: iter_expr = node[-3] - assert iter_expr == "expr" + assert iter_expr in ("expr", "get_aiter"), iter_expr self.preorder(iter_expr) self.preorder(tree[iter_index]) self.prec = p @@ -194,11 +231,16 @@ def comprehension_walk_newer( ): """Non-closure-based comprehensions the way they are done in Python3 and some Python 2.7. Note: there are also other set comprehensions. + + Note: there are also other comprehensions. """ # FIXME: DRY with listcomp_closure3 + p = self.prec self.prec = PRECEDENCE["lambda_body"] - 1 + comp_for = None + # FIXME? Nonterminals in grammar maybe should be split out better? # Maybe test on self.compile_mode? if ( @@ -232,53 +274,130 @@ def comprehension_walk_newer( is_30_dict_comp = False store = None + if node == "list_comp_async": - n = tree[2][1] + # We have two different kinds of grammar rules: + # list_comp_async ::= LOAD_LISTCOMP LOAD_STR MAKE_FUNCTION_0 expr ... + # and: + # list_comp_async ::= BUILD_LIST_0 LOAD_ARG list_afor2 + if tree[0] == "expr" and tree[0][0] == "list_comp_async": + tree = tree[0][0] + if tree[0] == "BUILD_LIST_0": + list_afor2 = tree[2] + assert list_afor2 == "list_afor2" + store = list_afor2[1] + assert store == "store" + n = list_afor2[3] if list_afor2[3] == "list_iter" else list_afor2[2] + else: + # ??? + pass + elif node.kind in ("dict_comp_async", "set_comp_async"): + # We have two different kinds of grammar rules: + # dict_comp_async ::= LOAD_DICTCOMP LOAD_STR MAKE_FUNCTION_0 expr ... + # set_comp_async ::= LOAD_SETCOMP LOAD_STR MAKE_FUNCTION_0 expr ... + # and: + # dict_comp_async ::= BUILD_MAP_0 genexpr_func_async + # set_comp_async ::= BUILD_SET_0 genexpr_func_async + if tree[0] == "expr": + tree = tree[0] + + if tree[0].kind in ("BUILD_MAP_0", "BUILD_SET_0"): + genexpr_func_async = tree[1] + if genexpr_func_async == "genexpr_func_async": + store = genexpr_func_async[2] + assert store.kind.startswith("store") + n = genexpr_func_async[4] + assert n == "comp_iter" + comp_for = collection_node + else: + set_afor2 = genexpr_func_async + assert set_afor2 == "set_afor2" + n = set_afor2[1] + store = n[1] + comp_for = node[3] + else: + # ??? + pass + + elif node == "list_afor": + comp_for = node[0] + list_afor2 = node[1] + assert list_afor2 == "list_afor2" + store = list_afor2[1] + assert store == "store" + n = list_afor2[2] + elif node == "set_afor2": + comp_for = node[0] + set_iter_async = node[1] + assert set_iter_async == "set_iter_async" + + store = set_iter_async[1] + assert store == "store" + n = set_iter_async[2] + elif node == "list_comp" and tree[0] == "expr": + list_iter = None + for list_iter_try in tree: + if list_iter_try == "list_iter": + list_iter = list_iter_try + break + if not list_iter_try: + tree = tree[0][0] + n = tree[iter_index] + else: + n = list_iter + pass + pass + pass else: n = tree[iter_index] if tree in ( - "set_comp_func", "dict_comp_func", + "genexpr_func_async", + "generator_exp", "list_comp", + "set_comp", + "set_comp_func", "set_comp_func_header", ): for k in tree: - if k == "comp_iter": + if k.kind in ("comp_iter", "list_iter", "set_iter", "await_expr"): n = k elif k == "store": store = k pass pass pass - elif tree in ("dict_comp", "set_comp"): - assert self.version == (3, 0) - for k in tree: - if k in ("dict_comp_header", "set_comp_header"): - n = k - elif k == "store": - store = k - elif k == "dict_comp_iter": - is_30_dict_comp = True - n = (k[3], k[1]) + elif tree.kind in ("list_comp_async", "dict_comp_async", "set_afor2"): + if self.version == (3, 0): + for k in tree: + if k in ("dict_comp_header", "set_comp_header"): + n = k + elif k == "store": + store = k + elif k == "dict_comp_iter": + is_30_dict_comp = True + n = (k[3], k[1]) + pass + elif k == "comp_iter": + n = k[0] + pass pass - elif k == "comp_iter": - n = k[0] - pass - pass elif tree == "list_comp_async": store = tree[2][1] else: - assert n == "list_iter", n + if n.kind in ("RETURN_VALUE_LAMBDA", "return_expr_lambda"): + self.prune() + + assert n in ("list_iter", "comp_iter"), n # FIXME: I'm not totally sure this is right. # Find the list comprehension body. It is the inner-most # node that is not list_.. . if_node = None - comp_for = None comp_store = None - if n == "comp_iter": + if n == "comp_iter" and store is None: comp_for = n comp_store = tree[3] @@ -289,7 +408,7 @@ def comprehension_walk_newer( while n in ("list_iter", "list_afor", "list_afor2", "comp_iter"): # iterate one nesting deeper - if self.version == 3.0 and len(n) == 3: + if self.version == (3, 0) and len(n) == 3: assert n[0] == "expr" and n[1] == "expr" n = n[1] elif n == "list_afor": @@ -302,14 +421,31 @@ def comprehension_walk_newer( n = n[0] if n in ("list_for", "comp_for"): - if n[2] == "store" and not store: - store = n[2] + if n == "list_for" and not comp_for and n[0] == "expr": + comp_for = n[0] + + n_index = 3 + if ( + (n[2] == "store") + or (self.version == (3, 0) and n[4] == "store") + and not store + ): + if self.version == (3, 0): + store = n[4] + n_index = 5 + else: + store = n[2] if not comp_store: comp_store = store - n = n[3] - elif n in ("list_if", "list_if_not", - "list_if37", "list_if37_not", - "comp_if", "comp_if_not"): + n = n[n_index] + elif n in ( + "list_if", + "list_if_not", + "list_if37", + "list_if37_not", + "comp_if", + "comp_if_not", + ): have_not = n in ("list_if_not", "comp_if_not", "list_if37_not") if n in ("list_if37", "list_if37_not"): n = n[1] @@ -335,7 +471,7 @@ def comprehension_walk_newer( assert store, "Couldn't find store in list/set comprehension" # A problem created with later Python code generation is that there - # is a lamda set up with a dummy argument name that is then called + # is a lambda set up with a dummy argument name that is then called # So we can't just translate that as is but need to replace the # dummy name. Below we are picking out the variable name as seen # in the code. And trying to generate code for the other parts @@ -348,7 +484,16 @@ def comprehension_walk_newer( self.write(": ") self.preorder(n[1]) else: - self.preorder(n[0]) + if self.version == (3, 0): + if isinstance(n, Token): + body = store + elif len(n) > 1: + body = n[1] + else: + body = n[0] + else: + body = n[0] + self.preorder(body) if node == "list_comp_async": self.write(" async") @@ -360,15 +505,29 @@ def comprehension_walk_newer( if comp_store: self.preorder(comp_store) + comp_store = None else: self.preorder(store) self.write(" in ") - self.preorder(node[in_node_index]) + if comp_for: + self.preorder(comp_for) + else: + try: + node[in_node_index] + except: + from trepan.api import debug + + debug() + self.preorder(node[in_node_index]) # Here is where we handle nested list iterations. if tree == "list_comp" and self.version != (3, 0): - list_iter = tree[1] + list_iter = None + for list_iter_try in tree: + if list_iter_try == "list_iter": + list_iter = list_iter_try + break assert list_iter == "list_iter" if list_iter[0] == "list_for": self.preorder(list_iter[0][3]) @@ -383,7 +542,7 @@ def comprehension_walk_newer( if have_not: self.write("not ") pass - self.prec = 27 + self.prec = PRECEDENCE["lambda_body"] - 1 self.preorder(if_node) pass self.prec = p @@ -394,7 +553,7 @@ def get_comprehension_function(self, node, code_index: int): find the comprehension node buried in the tree which may be surrounded with start-like symbols or dominiators,. """ - self.prec = 27 + self.prec = PRECEDENCE["lambda_body"] - 1 code_node = node[code_index] if code_node == "load_genexpr": code_node = code_node[0] @@ -410,7 +569,9 @@ def get_comprehension_function(self, node, code_index: int): if self.compile_mode in ("listcomp",): # add other comprehensions to this list p_save = self.p self.p = get_python_parser( - self.version, compile_mode="exec", is_pypy=self.is_pypy, + self.version, + compile_mode="exec", + is_pypy=self.is_pypy, ) tree = self.build_ast( code._tokens, code._customize, code, is_lambda=self.is_lambda @@ -429,9 +590,153 @@ def get_comprehension_function(self, node, code_index: int): if tree[0] in ("dom_start", "dom_start_opt"): tree = tree[1] - while len(tree) == 1 or ( - tree in ("stmt", "sstmt", "return", "return_expr", "return_expr_lambda") - ): + while len(tree) == 1 or (tree in ("stmt", "sstmt", "return", "return_expr")): self.prec = 100 tree = tree[1] if tree[0] in ("dom_start", "dom_start_opt") else tree[0] return tree + + def listcomp_closure3(self, node): + """ + List comprehensions in Python 3 when handled as a closure. + See if we can combine code. + """ + + # FIXME: DRY with comprehension_walk_newer + p = self.prec + self.prec = 27 + + code_obj = node[1].attr + assert iscode(code_obj), node[1] + code = Code(code_obj, self.scanner, self.currentclass, self.debug_opts["asm"]) + + tree = self.build_ast(code._tokens, code._customize, code) + self.customize(code._customize) + + # skip over: sstmt, stmt, return, return_expr + # and other singleton derivations + while len(tree) == 1 or ( + tree in ("sstmt", "return") and tree[-1] in ("RETURN_LAST", "RETURN_VALUE") + ): + self.prec = 100 + tree = tree[0] + + n = tree[1] + + # Pick out important parts of the comprehension: + # * the variables we iterate over: "stores" + # * the results we accumulate: "n" + + # collections is the name of the expression(s) we are iterating over + collections = [node[-3]] + list_ifs = [] + + if self.version[:2] == (3, 0) and n.kind != "list_iter": + # FIXME 3.0 is a snowflake here. We need + # special code for this. Not sure if this is totally + # correct. + stores = [tree[3]] + assert tree[4] == "comp_iter" + n = tree[4] + # Find the list comprehension body. It is the inner-most + # node that is not comp_.. . + while n == "comp_iter": + if n[0] == "comp_for": + n = n[0] + stores.append(n[2]) + n = n[3] + elif n[0] in ("comp_if", "comp_if_not"): + n = n[0] + # FIXME: just a guess + if n[0].kind == "expr": + list_ifs.append(n) + else: + list_ifs.append([1]) + n = n[2] + pass + else: + break + pass + + # Skip over n[0] which is something like: _[1] + self.preorder(n[1]) + + else: + assert n == "list_iter" + stores = [] + # Find the list comprehension body. It is the inner-most + # node that is not list_.. . + while n == "list_iter": + # recurse one step + n = n[0] + + # FIXME: adjust for set comprehension + if n == "list_for": + stores.append(n[2]) + if self.version[:2] == (3, 0): + body_index = 5 + else: + body_index = 3 + + n = n[body_index] + if n[0] == "list_for": + # Dog-paddle down largely singleton reductions + # to find the collection (expr) + c = n[0][0] + if c == "expr": + c = c[0] + # FIXME: grammar is wonky here? Is this really an attribute? + if c == "attribute": + c = c[0] + collections.append(c) + pass + elif n in ("list_if", "list_if_not", "list_if_or_not"): + if n[0].kind == "expr": + list_ifs.append(n) + else: + list_ifs.append([1]) + if self.version[:2] == (3, 0) and n[2] == "list_iter": + n = n[2] + else: + n = n[-2] if n[-1] == "come_from_opt" else n[-1] + pass + elif n == "list_if37": + list_ifs.append(n) + n = n[-1] + pass + elif n == "list_afor": + collections.append(n[0][0]) + n = n[1] + stores.append(n[1][0]) + n = n[2] if n[2].kind == "list_iter" else n[3] + pass + + assert n == "lc_body", tree + + if self.version[:2] == (3, 0): + body_index = 1 + else: + body_index = 0 + self.preorder(n[body_index]) + + # FIXME: add indentation around "for"'s and "in"'s + n_colls = len(collections) + for i, store in enumerate(stores): + if i >= n_colls: + break + token = collections[i] + if not isinstance(token, Token): + token = token.first_child() + if token == "LOAD_DEREF" and co_flags_is_async(code_obj.co_flags): + self.write(" async") + pass + self.write(" for ") + if self.version[:2] == (3, 0): + store = token + self.preorder(store) + self.write(" in ") + self.preorder(collections[i]) + if i < len(list_ifs): + self.preorder(list_ifs[i]) + pass + pass + self.prec = p diff --git a/uncompyle6/semantics/helper.py b/uncompyle6/semantics/helper.py index afaf7b73e..78998bec9 100644 --- a/uncompyle6/semantics/helper.py +++ b/uncompyle6/semantics/helper.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 Rocky Bernstein +# Copyright (c) 2022-2023 Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -43,7 +43,7 @@ def escape_string(s, quotes=('"', "'", '"""', "'''")): s = s.replace(orig, replace) return "%s%s%s" % (quote, s, quote) -# FIXME: this and find_globals could be paramaterized with one of the +# FIXME: this and find_globals could be parameterized with one of the # above global ops def find_all_globals(node, globs): """Search Syntax Tree node to find variable names that are global.""" @@ -152,6 +152,9 @@ def is_lambda_mode(compile_mode: str) -> bool: def print_docstring(self, indent, docstring): + if isinstance(docstring, bytes): + docstring = docstring.decode("utf8", errors="backslashreplace") + quote = '"""' if docstring.find(quote) >= 0: if docstring.find("'''") == -1: diff --git a/uncompyle6/semantics/linemap.py b/uncompyle6/semantics/linemap.py index 23eafdae3..e548fa53c 100644 --- a/uncompyle6/semantics/linemap.py +++ b/uncompyle6/semantics/linemap.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 by Rocky Bernstein +# Copyright (c) 2018, 2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -12,96 +12,100 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . + +from uncompyle6.semantics.fragments import ( + FragmentsWalker, + code_deparse as fragments_code_deparse, +) from uncompyle6.semantics.pysource import SourceWalker, code_deparse -import uncompyle6.semantics.fragments as fragments + # FIXME: does this handle nested code, and lambda properly class LineMapWalker(SourceWalker): def __init__(self, *args, **kwargs): - super(LineMapWalker, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.source_linemap = {} self.current_line_number = 1 def write(self, *data): - """Augment write routine to keep track of current line""" - for l in data: - ## print("XXX write: '%s'" % l) - for i in str(l): - if i == '\n': + """Augment write routine to keep track of current line.""" + for line in data: + # print(f"XXX write: '{line}'") + for i in str(line): + if i == "\n": self.current_line_number += 1 pass pass pass - return super(LineMapWalker, self).write(*data) + return super().write(*data) # Note n_expr needs treatment too def default(self, node): - """Augment write default routine to record line number changes""" - if hasattr(node, 'linestart'): + """Augment default-write routine to record line number changes.""" + if hasattr(node, "linestart"): if node.linestart: self.source_linemap[self.current_line_number] = node.linestart - return super(LineMapWalker, self).default(node) + return super().default(node) def n_LOAD_CONST(self, node): - if hasattr(node, 'linestart'): + if hasattr(node, "linestart"): if node.linestart: self.source_linemap[self.current_line_number] = node.linestart - return super(LineMapWalker, self).n_LOAD_CONST(node) + return super().n_LOAD_CONST(node) -class LineMapFragmentWalker(fragments.FragmentsWalker, LineMapWalker): +class LineMapFragmentWalker(LineMapWalker, FragmentsWalker): def __init__(self, *args, **kwargs): - super(LineMapFragmentWalker, self).__init__(*args, **kwargs) - self.source_linemap = {} - self.current_line_number = 0 + super().__init__(*args, **kwargs) + def deparse_code_with_map(*args, **kwargs): """ Like deparse_code but saves line number correspondences. Deprecated. Use code_deparse_with_map """ - kwargs['walker'] = LineMapWalker + kwargs["walker"] = LineMapWalker return code_deparse(*args, **kwargs) + def code_deparse_with_map(*args, **kwargs): """ Like code_deparse but saves line number correspondences. """ - kwargs['walker'] = LineMapWalker + kwargs["walker"] = LineMapWalker return code_deparse(*args, **kwargs) -def deparse_code_with_fragments_and_map(*args, **kwargs): - """ - Like deparse_code_with_map but saves fragments. - Deprecated. Use code_deparse_with_fragments_and_map - """ - kwargs['walker'] = LineMapFragmentWalker - return fragments.deparse_code(*args, **kwargs) def code_deparse_with_fragments_and_map(*args, **kwargs): """ Like code_deparse_with_map but saves fragments. """ - kwargs['walker'] = LineMapFragmentWalker - return fragments.code_deparse(*args, **kwargs) + kwargs["walker"] = LineMapFragmentWalker + return fragments_code_deparse(*args, **kwargs) + + +if __name__ == "__main__": -if __name__ == '__main__': def deparse_test(co): - "This is a docstring" + """This is a docstring""" deparsed = code_deparse_with_map(co) - a = 1; b = 2 + a = 1 + b = 2 print("\n") - linemap = [(line_no, deparsed.source_linemap[line_no]) - for line_no in - sorted(deparsed.source_linemap.keys())] + linemap = [ + (line_no, deparsed.source_linemap[line_no]) + for line_no in sorted(deparsed.source_linemap.keys()) + ] print(linemap) deparsed = code_deparse_with_fragments_and_map(co) print("\n") - linemap2 = [(line_no, deparsed.source_linemap[line_no]) - for line_no in - sorted(deparsed.source_linemap.keys())] + linemap2 = [ + (line_no, deparsed.source_linemap[line_no]) + for line_no in sorted(deparsed.source_linemap.keys()) + ] print(linemap2) # assert linemap == linemap2 return - deparse_test(deparse_test.__code__) + + deparse_test(deparse_test.func_code) diff --git a/uncompyle6/semantics/make_function1.py b/uncompyle6/semantics/make_function1.py new file mode 100644 index 000000000..09f95b4f0 --- /dev/null +++ b/uncompyle6/semantics/make_function1.py @@ -0,0 +1,191 @@ +# Copyright (c) 2015-2023 by Rocky Bernstein +# Copyright (c) 2000-2002 by hartmut Goebel +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +All the crazy things we have to do to handle Python functions in Python before 3.0. +The saga of changes continues in 3.0 and above and in other files. +""" +from typing import List, Tuple +from uncompyle6.scanner import Code +from uncompyle6.semantics.parser_error import ParserError +from uncompyle6.parser import ParserError as ParserError2 +from uncompyle6.semantics.helper import ( + print_docstring, + find_all_globals, + find_globals_and_nonlocals, + find_none, +) +from xdis import iscode + +def make_function1(self, node, is_lambda, nested=1, code_node=None): + """ + Dump function definition, doc string, and function body. + This code is specialied for Python 2. + """ + + def build_param(tree, param_names: List[str]) -> Tuple[bool, List[str]]: + """build parameters: + - handle defaults + - handle format tuple parameters + """ + # if formal parameter is a tuple, the parameter name + # starts with a dot (eg. '.1', '.2') + args = tree[0] + del tree[0] + params = [] + assert args.kind in ("star_args", "args", "varargs") + has_star_arg = args.kind in ("star_args", "varargs") + args_store = args[2] + if args_store == "args_store": + for arg in args_store: + params.append(param_names[arg.attr]) + return has_star_arg, params + + # MAKE_FUNCTION_... or MAKE_CLOSURE_... + assert node[-1].kind.startswith("BUILD_") + + defparams = [] + # args_node = node[-1] + # if isinstance(args_node.attr, tuple): + # # positional args are after kwargs + # defparams = node[1 : args_node.attr[0] + 1] + # pos_args, kw_args, annotate_argc = args_node.attr + # else: + # defparams = node[: args_node.attr] + # kw_args = 0 + # pass + + lambda_index = None + + if lambda_index and is_lambda and iscode(node[lambda_index].attr): + assert node[lambda_index].kind == "LOAD_LAMBDA" + code = node[lambda_index].attr + else: + code = code_node.attr + + assert iscode(code) + code = Code(code, self.scanner, self.currentclass) + + # add defaults values to parameter names + argc = code.co_argcount + paramnames = list(code.co_varnames[:argc]) + + # defaults are for last n parameters, thus reverse + paramnames.reverse() + defparams.reverse() + + try: + tree = self.build_ast( + code._tokens, + code._customize, + code, + is_lambda=is_lambda, + noneInNames=("None" in code.co_names), + ) + except (ParserError, ParserError2) as p: + self.write(str(p)) + if not self.tolerate_errors: + self.ERROR = p + return + + indent = self.indent + + # build parameters + has_star_arg, params = build_param(tree, code.co_names) + + if has_star_arg: + params[-1] = "*" + params[-1] + + # dump parameter list (with default values) + if is_lambda: + self.write("lambda ", ", ".join(params)) + # If the last statement is None (which is the + # same thing as "return None" in a lambda) and the + # next to last statement is a "yield". Then we want to + # drop the (return) None since that was just put there + # to have something to after the yield finishes. + # FIXME: this is a bit hoaky and not general + if ( + len(tree) > 1 + and self.traverse(tree[-1]) == "None" + and self.traverse(tree[-2]).strip().startswith("yield") + ): + del tree[-1] + # Now pick out the expr part of the last statement + tree_expr = tree[-1] + while tree_expr.kind != "expr": + tree_expr = tree_expr[0] + tree[-1] = tree_expr + pass + else: + self.write("(", ", ".join(params)) + + # if kw_args > 0: + # if not (4 & code.co_flags): + # if argc > 0: + # self.write(", *, ") + # else: + # self.write("*, ") + # pass + # else: + # self.write(", ") + + # for n in node: + # if n == "pos_arg": + # continue + # else: + # self.preorder(n) + # break + # pass + + # if code_has_star_star_arg(code): + # if argc > 0: + # self.write(", ") + # self.write("**%s" % code.co_varnames[argc + kw_pairs]) + + if is_lambda: + self.write(": ") + else: + self.println("):") + + if ( + len(code.co_consts) > 0 and code.co_consts[0] is not None and not is_lambda + ): # ugly + # docstring exists, dump it + print_docstring(self, indent, code.co_consts[0]) + + if not is_lambda: + assert tree == "stmts" + + all_globals = find_all_globals(tree, set()) + + globals, nonlocals = find_globals_and_nonlocals( + tree, set(), set(), code, self.version + ) + + # Python 1 doesn't support the "nonlocal" statement + + for g in sorted((all_globals & self.mod_globs) | globals): + self.println(self.indent, "global ", g) + self.mod_globs -= all_globals + has_none = "None" in code.co_names + rn = has_none and not find_none(tree) + tree.code = code + self.gen_source( + tree, code.co_name, code._customize, is_lambda=is_lambda, returnNone=rn + ) + + code._tokens = None # save memory + code._customize = None # save memory diff --git a/uncompyle6/semantics/make_function2.py b/uncompyle6/semantics/make_function2.py index be93c7962..91a0252ac 100644 --- a/uncompyle6/semantics/make_function2.py +++ b/uncompyle6/semantics/make_function2.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2021 by Rocky Bernstein +# Copyright (c) 2015-2021 2024 by Rocky Bernstein # Copyright (c) 2000-2002 by hartmut Goebel # # This program is free software: you can redistribute it and/or modify @@ -17,33 +17,34 @@ All the crazy things we have to do to handle Python functions in Python before 3.0. The saga of changes continues in 3.0 and above and in other files. """ -from uncompyle6.scanner import Code -from uncompyle6.semantics.parser_error import ParserError + +from itertools import zip_longest + +from xdis import code_has_star_arg, code_has_star_star_arg, iscode + from uncompyle6.parser import ParserError as ParserError2 +from uncompyle6.scanner import Code from uncompyle6.semantics.helper import ( - print_docstring, find_all_globals, find_globals_and_nonlocals, find_none, + print_docstring, ) -from xdis import iscode, code_has_star_arg, code_has_star_star_arg - -from itertools import zip_longest +from uncompyle6.semantics.parser_error import ParserError -from uncompyle6.show import maybe_show_tree_param_default def make_function2(self, node, is_lambda, nested=1, code_node=None): """ - Dump function defintion, doc string, and function body. + Dump function definition, doc string, and function body. This code is specialied for Python 2. """ def build_param(ast, name, default): """build parameters: - - handle defaults - - handle format tuple parameters + - handle defaults + - handle format tuple parameters """ - # if formal parameter is a tuple, the paramater name + # if formal parameter is a tuple, the parameter name # starts with a dot (eg. '.1', '.2') if name.startswith("."): # replace the name with the tuple-string @@ -52,7 +53,6 @@ def build_param(ast, name, default): if default: value = self.traverse(default, indent="") - maybe_show_tree_param_default(self.showast, name, value) result = "%s=%s" % (name, value) if result[-2:] == "= ": # default was 'LOAD_CONST None' result += "None" @@ -199,5 +199,5 @@ def build_param(ast, name, default): ast, code.co_name, code._customize, is_lambda=is_lambda, returnNone=rn ) - code._tokens = None # save memory + code._tokens = None # save memory code._customize = None # save memory diff --git a/uncompyle6/semantics/make_function3.py b/uncompyle6/semantics/make_function3.py index ad4d80e48..a7377f61f 100644 --- a/uncompyle6/semantics/make_function3.py +++ b/uncompyle6/semantics/make_function3.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2021 by Rocky Bernstein +# Copyright (c) 2015-2021, 2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -16,18 +16,18 @@ All the crazy things we have to do to handle Python functions in 3.0-3.5 or so. The saga of changes before and after is in other files. """ -from xdis import iscode, code_has_star_arg, code_has_star_star_arg, CO_GENERATOR -from uncompyle6.scanner import Code -from uncompyle6.parsers.treenode import SyntaxTree -from uncompyle6.semantics.parser_error import ParserError +from xdis import CO_GENERATOR, code_has_star_arg, code_has_star_star_arg, iscode + from uncompyle6.parser import ParserError as ParserError2 +from uncompyle6.parsers.treenode import SyntaxTree +from uncompyle6.scanner import Code from uncompyle6.semantics.helper import ( - print_docstring, find_all_globals, find_globals_and_nonlocals, find_none, + print_docstring, ) - +from uncompyle6.semantics.parser_error import ParserError from uncompyle6.show import maybe_show_tree_param_default # FIXME: DRY the below code... @@ -37,13 +37,13 @@ def make_function3_annotate( self, node, is_lambda, nested=1, code_node=None, annotate_last=-1 ): """ - Dump function defintion, doc string, and function + Dump function definition, doc string, and function body. This code is specialized for Python 3""" def build_param(ast, name, default): """build parameters: - - handle defaults - - handle format tuple parameters + - handle defaults + - handle format tuple parameters """ if default: value = self.traverse(default, indent="") @@ -300,7 +300,7 @@ def build_param(ast, name, default): def make_function3(self, node, is_lambda, nested=1, code_node=None): """Dump function definition, doc string, and function body in - Python version 3.0 and above + Python version 3.0 and above """ # For Python 3.3, the evaluation stack in MAKE_FUNCTION is: @@ -310,7 +310,7 @@ def make_function3(self, node, is_lambda, nested=1, code_node=None): # the object on the stack, for keyword-only parameters # * parameter annotation objects # * a tuple listing the parameter names for the annotations - # (only if there are ony annotation objects) + # (only if there are only annotation objects) # * the code associated with the function (at TOS1) # * the qualified name of the function (at TOS) @@ -333,11 +333,10 @@ def make_function3(self, node, is_lambda, nested=1, code_node=None): def build_param(ast, name, default, annotation=None): """build parameters: - - handle defaults - - handle format tuple parameters + - handle defaults + - handle format tuple parameters """ value = self.traverse(default, indent="") - maybe_show_tree_param_default(self.showast, name, value) if annotation: result = "%s: %s=%s" % (name, annotation, value) else: @@ -419,7 +418,6 @@ def build_param(ast, name, default, annotation=None): pass if len(node) > 2 and (have_kwargs or node[lc_index].kind != "load_closure"): - # Find the index in "node" where the first default # parameter value is located. Note this is in contrast to # key-word arguments, pairs of (name, value), which appear after "*". @@ -492,8 +490,6 @@ def build_param(ast, name, default, annotation=None): self.ERROR = p return - kw_pairs = 0 - i = len(paramnames) - len(defparams) # build parameters diff --git a/uncompyle6/semantics/make_function36.py b/uncompyle6/semantics/make_function36.py index 1509019d7..a976e996d 100644 --- a/uncompyle6/semantics/make_function36.py +++ b/uncompyle6/semantics/make_function36.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021 by Rocky Bernstein +# Copyright (c) 2019-2022 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,34 +17,33 @@ The saga of changes before 3.6 is in other files. """ from xdis import ( - iscode, + CO_ASYNC_GENERATOR, + CO_GENERATOR, code_has_star_arg, code_has_star_star_arg, - CO_GENERATOR, - CO_ASYNC_GENERATOR, + iscode, ) -from uncompyle6.scanner import Code -from uncompyle6.semantics.parser_error import ParserError + from uncompyle6.parser import ParserError as ParserError2 +from uncompyle6.scanner import Code from uncompyle6.semantics.helper import ( find_all_globals, find_globals_and_nonlocals, find_none, ) -from itertools import zip_longest - -from uncompyle6.show import maybe_show_tree_param_default +from uncompyle6.semantics.parser_error import ParserError def make_function36(self, node, is_lambda, nested=1, code_node=None): """Dump function definition, doc string, and function body in Python version 3.6 and above. """ - # MAKE_CLOSURE adds an additional closure slot + + # MAKE_CLOSURE adds a closure slot # In Python 3.6 and above stack change again. I understand # 3.7 changes some of those changes, although I don't - # see it in this code yet. Yes, it is hard to follow + # see it in this code yet. Yes, it is hard to follow, # and I am sure I haven't been able to keep up. # Thank you, Python. @@ -55,7 +54,6 @@ def build_param(ast, name, default, annotation=None): - handle format tuple parameters """ value = default - maybe_show_tree_param_default(self.showast, name, value) if annotation: result = "%s: %s=%s" % (name, annotation, value) else: @@ -86,16 +84,15 @@ def build_param(ast, name, default, annotation=None): args_attr = args_node.attr if len(args_attr) == 3: - pos_args, kw_args, annotate_argc = args_attr + _, kw_args, annotate_argc = args_attr else: - pos_args, kw_args, annotate_argc, closure = args_attr + _, kw_args, annotate_argc, closure = args_attr if node[-2] != "docstring": i = -4 else: i = -5 - kw_pairs = 0 if annotate_argc: # Turn into subroutine and DRY with other use annotate_node = node[i] @@ -107,9 +104,9 @@ def build_param(ast, name, default, annotation=None): ): types = [self.traverse(n, indent="") for n in annotate_node[:-2]] names = annotate_node[-2].attr - l = len(types) - assert l == len(names) - for i in range(l): + length = len(types) + assert length == len(names) + for i in range(length): annotate_dict[names[i]] = types[i] pass pass @@ -120,11 +117,6 @@ def build_param(ast, name, default, annotation=None): # annotate = node[i] i -= 1 - if kw_args: - kw_node = node[pos_args] - if kw_node == "expr": - kw_node = kw_node[0] - defparams = [] # FIXME: DRY with code below default, kw_args, annotate_argc = args_node.attr[0:3] @@ -162,7 +154,7 @@ def build_param(ast, name, default, annotation=None): defparams.reverse() try: - ast = self.build_ast( + tree = self.build_ast( scanner_code._tokens, scanner_code._customize, scanner_code, @@ -183,7 +175,7 @@ def build_param(ast, name, default, annotation=None): for i, defparam in enumerate(defparams): params.append( build_param( - ast, paramnames[i], defparam, annotate_dict.get(paramnames[i]) + tree, paramnames[i], defparam, annotate_dict.get(paramnames[i]) ) ) @@ -212,7 +204,13 @@ def build_param(ast, name, default, annotation=None): # dump parameter list (with default values) if is_lambda: - self.write("lambda ", ", ".join(params)) + self.write("lambda") + if len(params): + self.write(" ", ", ".join(params)) + elif kwonlyargcount > 0 and not (4 & code.co_flags): + assert argc == 0 + self.write(" ") + # If the last statement is None (which is the # same thing as "return None" in a lambda) and the # next to last statement is a "yield". Then we want to @@ -220,16 +218,16 @@ def build_param(ast, name, default, annotation=None): # to have something to after the yield finishes. # FIXME: this is a bit hoaky and not general if ( - len(ast) > 1 - and self.traverse(ast[-1]) == "None" - and self.traverse(ast[-2]).strip().startswith("yield") + len(tree) > 1 + and self.traverse(tree[-1]) == "None" + and self.traverse(tree[-2]).strip().startswith("yield") ): - del ast[-1] + del tree[-1] # Now pick out the expr part of the last statement - ast_expr = ast[-1] - while ast_expr.kind != "expr": - ast_expr = ast_expr[0] - ast[-1] = ast_expr + tree_expr = tree[-1] + while tree_expr.kind != "expr": + tree_expr = tree_expr[0] + tree[-1] = tree_expr pass else: self.write("(", ", ".join(params)) @@ -237,19 +235,19 @@ def build_param(ast, name, default, annotation=None): ends_in_comma = False if kwonlyargcount > 0: - if not (4 & code.co_flags): + if not 4 & code.co_flags: if argc > 0: self.write(", *, ") else: self.write("*, ") pass - ends_in_comma = True else: if argc > 0: self.write(", ") - ends_in_comma = True - ann_dict = kw_dict = default_tup = None + # ann_dict = kw_dict = default_tup = None + kw_dict = None + fn_bits = node[-1].attr # Skip over: # MAKE_FUNCTION, @@ -299,7 +297,7 @@ def build_param(ast, name, default, annotation=None): pass pass # handle others - other_kw = [c == None for c in kw_args] + other_kw = [c is None for c in kw_args] for i, flag in enumerate(other_kw): if flag: @@ -337,11 +335,11 @@ def build_param(ast, name, default, annotation=None): # docstring exists, dump it self.println(self.traverse(node[-2])) - assert ast == "stmts" + assert tree in ("stmts", "lambda_start") - all_globals = find_all_globals(ast, set()) + all_globals = find_all_globals(tree, set()) globals, nonlocals = find_globals_and_nonlocals( - ast, set(), set(), code, self.version + tree, set(), set(), code, self.version ) for g in sorted((all_globals & self.mod_globs) | globals): @@ -352,9 +350,9 @@ def build_param(ast, name, default, annotation=None): self.mod_globs -= all_globals has_none = "None" in code.co_names - rn = has_none and not find_none(ast) + rn = has_none and not find_none(tree) self.gen_source( - ast, + tree, code.co_name, scanner_code._customize, is_lambda=is_lambda, diff --git a/uncompyle6/semantics/n_actions.py b/uncompyle6/semantics/n_actions.py index 679266a46..9234cea0f 100644 --- a/uncompyle6/semantics/n_actions.py +++ b/uncompyle6/semantics/n_actions.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 by Rocky Bernstein +# Copyright (c) 2022-2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -16,21 +16,19 @@ Custom Nonterminal action functions. See NonterminalActions docstring. """ +from uncompyle6.parsers.treenode import SyntaxTree +from uncompyle6.scanners.tok import Token from uncompyle6.semantics.consts import ( INDENT_PER_LEVEL, + NO_PARENTHESIS_EVER, NONE, + PARENTHESIS_ALWAYS, PRECEDENCE, minint, ) +from uncompyle6.semantics.helper import find_code_node, flatten_list, print_docstring +from uncompyle6.util import better_repr, get_code_name -from uncompyle6.parsers.treenode import SyntaxTree -from uncompyle6.scanners.tok import Token -from uncompyle6.util import better_repr - -from uncompyle6.semantics.helper import ( - find_code_node, - flatten_list, -) class NonterminalActions: """ @@ -43,7 +41,14 @@ class NonterminalActions: node is the subtree of the parse tree the that nonterminal name as the root. """ - def n_alias(self, node): + def __init__(self): + # Precedence is used to determine when an expression needs + # parenthesis surrounding it. A high value indicates no + # parenthesis are needed. + self.prec = 1000 + self.in_format_string = False + + def n_alias(self, node: SyntaxTree): if self.version <= (2, 1): if len(node) == 2: store = node[1] @@ -65,9 +70,10 @@ def n_alias(self, node): else: self.write(iname, " as ", sname) self.prune() # stop recursing + n_alias37 = n_alias - def n_assign(self, node): + def n_assign(self, node: SyntaxTree): # A horrible hack for Python 3.0 .. 3.2 if (3, 0) <= self.version <= (3, 2) and len(node) == 2: if ( @@ -78,19 +84,19 @@ def n_assign(self, node): self.prune() self.default(node) - def n_assign2(self, node): + def n_assign2(self, node: SyntaxTree): for n in node[-2:]: if n[0] == "unpack": n[0].kind = "unpack_w_parens" self.default(node) - def n_assign3(self, node): + def n_assign3(self, node: SyntaxTree): for n in node[-3:]: if n[0] == "unpack": n[0].kind = "unpack_w_parens" self.default(node) - def n_attribute(self, node): + def n_attribute(self, node: SyntaxTree): if node[0] == "LOAD_CONST" or node[0] == "expr" and node[0][0] == "LOAD_CONST": # FIXME: I didn't record which constants parenthesis is # necessary. However, I suspect that we could further @@ -100,7 +106,7 @@ def n_attribute(self, node): node.kind = "attribute_w_parens" self.default(node) - def n_bin_op(self, node): + def n_bin_op(self, node: SyntaxTree): """bin_op (formerly "binary_expr") is the Python AST BinOp""" self.preorder(node[0]) self.write(" ") @@ -112,9 +118,9 @@ def n_bin_op(self, node): self.prec += 1 self.prune() - def n_build_slice2(self, node): + def n_build_slice2(self, node: SyntaxTree): p = self.prec - self.prec = 100 + self.prec = NO_PARENTHESIS_EVER if not node[0].isNone(): self.preorder(node[0]) self.write(":") @@ -123,10 +129,9 @@ def n_build_slice2(self, node): self.prec = p self.prune() # stop recursing - - def n_build_slice3(self, node): + def n_build_slice3(self, node: SyntaxTree): p = self.prec - self.prec = 100 + self.prec = NO_PARENTHESIS_EVER if not node[0].isNone(): self.preorder(node[0]) self.write(":") @@ -138,8 +143,7 @@ def n_build_slice3(self, node): self.prec = p self.prune() # stop recursing - def n_classdef(self, node): - + def n_classdef(self, node: SyntaxTree): if self.version >= (3, 6): self.n_classdef36(node) elif self.version >= (3, 0): @@ -152,7 +156,7 @@ def n_classdef(self, node): # * class_name - the name of the class # * subclass_info - the parameters to the class e.g. # class Foo(bar, baz) - # ----------- + # ------------ # * subclass_code - the code for the subclass body if node == "classdefdeco2": @@ -174,7 +178,7 @@ def n_classdef(self, node): subclass_code = build_class[-3][1].attr class_name = node[0][0].pattr else: - raise "Internal Error n_classdef: cannot find class name" + raise RuntimeError("Internal Error n_classdef: cannot find class name") if node == "classdefdeco2": self.write("\n") @@ -202,7 +206,7 @@ def n_classdef(self, node): n_classdefdeco2 = n_classdef - def n_const_list(self, node): + def n_const_list(self, node: SyntaxTree): """ prettyprint a constant dict, list, set or tuple. """ @@ -221,40 +225,82 @@ def n_const_list(self, node): else: # from trepan.api import debug; debug() raise TypeError( - f"Internal Error: n_const_list expects dict, list set, or set; got {lastnodetype}" + ( + "Internal Error: n_const_list expects dict, list set, or set; got " + f"{lastnodetype}" + ) ) self.indent_more(INDENT_PER_LEVEL) sep = "" + line_len = len(self.indent) if is_dict: keys = flat_elems[-1].attr assert isinstance(keys, tuple) assert len(keys) == len(flat_elems) - 1 for i, elem in enumerate(flat_elems[:-1]): assert elem.kind == "ADD_VALUE" - value = elem.pattr + if elem.optype in ("local", "name"): + value = elem.attr + elif elem.optype == "const" and not isinstance(elem.attr, str): + value = elem.attr + else: + try: + value = "%r" % elem.pattr + except Exception: + value = elem.pattr if elem.linestart is not None: if elem.linestart != self.line_number: - sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1] + next_indent = self.indent + INDENT_PER_LEVEL[:-1] + line_len = len(next_indent) + sep += "\n" + next_indent self.line_number = elem.linestart else: if sep != "": - sep += " " - self.write(f"{sep} {repr(keys[i])}: {value}") - sep = "," + sep += ", " + elif line_len > 80: + next_indent = self.indent + INDENT_PER_LEVEL[:-1] + line_len = len(next_indent) + sep += "\n" + next_indent + + sep_key_value = f"{sep}{repr(keys[i])}: {value}" + line_len += len(sep_key_value) + self.write(sep_key_value) + sep = ", " else: for elem in flat_elems: - assert elem.kind == "ADD_VALUE" - value = elem.pattr + if elem == "add_value": + elem = elem[0] + + if elem == "ADD_VALUE": + if elem.optype in ("local", "name"): + value = elem.attr + elif elem.optype == "const": + value = elem.pattr + else: + value = "%s" % repr(elem.attr) + else: + assert elem.kind == "ADD_VALUE_VAR" + value = "%s" % elem.pattr + if elem.linestart is not None: if elem.linestart != self.line_number: - sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1] + next_indent = self.indent + INDENT_PER_LEVEL[:-1] + line_len += len(next_indent) + sep += "\n" + next_indent self.line_number = elem.linestart else: if sep != "": sep += " " + line_len += len(sep) + elif line_len > 80: + next_indent = self.indent + INDENT_PER_LEVEL[:-1] + line_len = len(next_indent) + sep += "\n" + next_indent + + line_len += len(sep) + len(str(value)) + 1 self.write(sep, value) - sep = "," + sep = ", " self.write(endchar) self.indent_less(INDENT_PER_LEVEL) @@ -262,7 +308,7 @@ def n_const_list(self, node): self.prune() return - def n_delete_subscript(self, node): + def n_delete_subscript(self, node: SyntaxTree): if node[-2][0] == "build_list" and node[-2][0][-1].kind.startswith( "BUILD_TUPLE" ): @@ -272,7 +318,7 @@ def n_delete_subscript(self, node): n_store_subscript = n_subscript = n_delete_subscript - def n_dict(self, node): + def n_dict(self, node: SyntaxTree): """ Prettyprint a dict. 'dict' is something like k = {'a': 1, 'b': 42}" @@ -284,7 +330,7 @@ def n_dict(self, node): return p = self.prec - self.prec = 100 + self.prec = PRECEDENCE["dict"] self.indent_more(INDENT_PER_LEVEL) sep = INDENT_PER_LEVEL[:-1] @@ -296,8 +342,8 @@ def n_dict(self, node): if node[0].kind.startswith("kvlist"): # Python 3.5+ style key/value list in dict kv_node = node[0] - l = list(kv_node) - length = len(l) + ll = list(kv_node) + length = len(ll) if kv_node[-1].kind.startswith("BUILD_MAP"): length -= 1 i = 0 @@ -305,7 +351,7 @@ def n_dict(self, node): # Respect line breaks from source while i < length: self.write(sep) - name = self.traverse(l[i], indent="") + name = self.traverse(ll[i], indent="") if i > 0: line_number = self.indent_if_source_nl( line_number, self.indent + INDENT_PER_LEVEL[:-1] @@ -313,7 +359,7 @@ def n_dict(self, node): line_number = self.line_number self.write(name, ": ") value = self.traverse( - l[i + 1], indent=self.indent + (len(name) + 2) * " " + ll[i + 1], indent=self.indent + (len(name) + 2) * " " ) self.write(value) sep = ", " @@ -326,15 +372,15 @@ def n_dict(self, node): elif len(node) > 1 and node[1].kind.startswith("kvlist"): # Python 3.0..3.4 style key/value list in dict kv_node = node[1] - l = list(kv_node) - if len(l) > 0 and l[0].kind == "kv3": + ll = list(kv_node) + if len(ll) > 0 and ll[0].kind == "kv3": # Python 3.2 does this kv_node = node[1][0] - l = list(kv_node) + ll = list(kv_node) i = 0 - while i < len(l): + while i < len(ll): self.write(sep) - name = self.traverse(l[i + 1], indent="") + name = self.traverse(ll[i + 1], indent="") if i > 0: line_number = self.indent_if_source_nl( line_number, self.indent + INDENT_PER_LEVEL[:-1] @@ -343,7 +389,7 @@ def n_dict(self, node): line_number = self.line_number self.write(name, ": ") value = self.traverse( - l[i], indent=self.indent + (len(name) + 2) * " " + ll[i], indent=self.indent + (len(name) + 2) * " " ) self.write(value) sep = ", " @@ -482,7 +528,6 @@ def n_dict(self, node): self.prune() def n_docstring(self, node): - indent = self.indent doc_node = node[0] if doc_node.attr: @@ -496,73 +541,10 @@ def n_docstring(self, node): else: docstring = node[0].pattr - quote = '"""' - if docstring.find(quote) >= 0: - if docstring.find("'''") == -1: - quote = "'''" - - self.write(indent) - docstring = repr(docstring.expandtabs())[1:-1] - - for (orig, replace) in ( - ("\\\\", "\t"), - ("\\r\\n", "\n"), - ("\\n", "\n"), - ("\\r", "\n"), - ('\\"', '"'), - ("\\'", "'"), - ): - docstring = docstring.replace(orig, replace) - - # Do a raw string if there are backslashes but no other escaped characters: - # also check some edge cases - if ( - "\t" in docstring - and "\\" not in docstring - and len(docstring) >= 2 - and docstring[-1] != "\t" - and (docstring[-1] != '"' or docstring[-2] == "\t") - ): - self.write("r") # raw string - # Restore backslashes unescaped since raw - docstring = docstring.replace("\t", "\\") - else: - # Escape the last character if it is the same as the - # triple quote character. - quote1 = quote[-1] - if len(docstring) and docstring[-1] == quote1: - docstring = docstring[:-1] + "\\" + quote1 - - # Escape triple quote when needed - if quote == '"""': - replace_str = '\\"""' - else: - assert quote == "'''" - replace_str = "\\'''" - - docstring = docstring.replace(quote, replace_str) - docstring = docstring.replace("\t", "\\\\") - - lines = docstring.split("\n") - - self.write(quote) - if len(lines) == 0: - self.println(quote) - elif len(lines) == 1: - self.println(lines[0], quote) - else: - self.println(lines[0]) - for line in lines[1:-1]: - if line: - self.println(line) - else: - self.println("\n\n") - pass - pass - self.println(lines[-1], quote) + print_docstring(self, indent, docstring) self.prune() - def n_elifelsestmtr(self, node): + def n_elifelsestmtr(self, node: SyntaxTree): if node[2] == "COME_FROM": return_stmts_node = node[3] node.kind = "elifelsestmtr2" @@ -593,7 +575,7 @@ def n_elifelsestmtr(self, node): self.indent_less() self.prune() - def n_except_cond2(self, node): + def n_except_cond2(self, node: SyntaxTree): if node[-1] == "come_from_opt": unpack_node = -3 else: @@ -607,7 +589,7 @@ def n_except_cond2(self, node): # FIXME: figure out how to get this into customization # put so that we can get access via super from # the fragments routine. - def n_exec_stmt(self, node): + def n_exec_stmt(self, node: SyntaxTree): """ exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT exec_stmt ::= expr exprlist EXEC_STMT @@ -639,7 +621,9 @@ def n_expr(self, node): # hasattr(self, 'current_line_number')): # self.source_linemap[self.current_line_number] = n.linestart - self.prec = PRECEDENCE.get(n.kind, -2) + if n.kind != "expr": + self.prec = PRECEDENCE.get(n.kind, PARENTHESIS_ALWAYS) + if n == "LOAD_CONST" and repr(n.pattr)[0] == "-": self.prec = 6 @@ -662,19 +646,32 @@ def n_generator_exp(self, node): self.write("(") iter_index = 3 if self.version > (3, 2): - code_index = -6 - if self.version > (3, 6): - # Python 3.7+ adds optional "come_froms" at node[0] - if node[0].kind in ("load_closure", "load_genexpr") and self.version >= (3, 8): + if self.version >= (3, 3): + if node[0].kind in ( + "load_closure", + "load_genexpr", + ) and self.version >= (3, 8): + code_index = -6 is_lambda = self.is_lambda if node[0].kind == "load_genexpr": self.is_lambda = False self.closure_walk(node, collection_index=4) self.is_lambda = is_lambda else: - code_index = -6 - iter_index = 4 if self.version < (3, 8) else 3 - self.comprehension_walk(node, iter_index=iter_index, code_index=code_index) + # Python 3.7+ adds optional "come_froms" at node[0] so count from + # the end. + if node == "generator_exp_async" and self.version[:2] == (3, 6): + code_index = 0 + else: + code_index = -6 + iter_index = ( + 4 + if self.version < (3, 8) and not isinstance(node[4], Token) + else 3 + ) + self.comprehension_walk( + node, iter_index=iter_index, code_index=code_index + ) pass pass else: @@ -683,7 +680,7 @@ def n_generator_exp(self, node): self.write(")") self.prune() - n_generator_exp_async = n_generator_exp + n_genexpr_func = n_generator_exp_async = n_generator_exp def n_ifelsestmtr(self, node): if node[2] == "COME_FROM": @@ -745,7 +742,7 @@ def n_ifelsestmtr(self, node): def n_import_from(self, node): relative_path_index = 0 if self.version >= (2, 5): - if node[relative_path_index].attr > 0: + if node[relative_path_index].pattr > 0: node[2].pattr = ("." * node[relative_path_index].attr) + node[2].pattr if self.version > (2, 7): if isinstance(node[1].pattr, tuple): @@ -763,7 +760,7 @@ def n_lambda_body(self, node): self.make_function(node, is_lambda=True, code_node=node[-2]) self.prune() # stop recursing - def n_list(self, node): + def n_list(self, node: SyntaxTree): """ prettyprint a dict, list, set or tuple. """ @@ -774,7 +771,8 @@ def n_list(self, node): p = self.prec self.prec = PRECEDENCE["yield"] - 1 - lastnode = node.pop() + lastnode = node[-1] + node = node[:-1] lastnodetype = lastnode.kind # If this build list is inside a CALL_FUNCTION_VAR, @@ -793,13 +791,16 @@ def n_list(self, node): if lastnodetype.startswith("BUILD_LIST"): self.write("[") endchar = "]" + elif lastnodetype.startswith("BUILD_MAP_UNPACK"): self.write("{*") endchar = "}" + elif lastnodetype.startswith("BUILD_SET"): self.write("{") endchar = "}" - elif lastnodetype.startswith("BUILD_TUPLE"): + + elif lastnodetype.startswith("BUILD_TUPLE") or node == "tuple": # Tuples can appear places that can NOT # have parenthesis around them, like array # subscripts. We check for that by seeing @@ -820,6 +821,7 @@ def n_list(self, node): elif lastnodetype.startswith("ROT_TWO"): self.write("(") endchar = ")" + else: raise TypeError( "Internal Error: n_build_list expects list, tuple, set, or unpack" @@ -858,69 +860,21 @@ def n_list(self, node): self.prune() return - n_set = n_tuple = n_build_set = n_list + n_set = n_build_set = n_tuple = n_list def n_list_comp(self, node): - """List comprehensions""" - p = self.prec - self.prec = 100 - if self.version >= (2, 7): - if self.is_pypy: - self.n_list_comp_pypy27(node) - return - n = node[-1] - elif node[-1] == "delete": - if node[-2] == "JUMP_BACK": - n = node[-3] - else: - n = node[-2] - - assert n == "list_iter" - - # Find the list comprehension body. It is the inner-most - # node that is not list_.. . - # FIXME: DRY with other use - while n == "list_iter": - n = n[0] # iterate one nesting deeper - if n == "list_for": - n = n[3] - elif n == "list_if": - n = n[2] - elif n == "list_if_not": - n = n[2] - assert n == "lc_body" - self.write("[ ") - - if self.version >= (2, 7): - expr = n[0] - list_iter = node[-1] + self.write("[") + if node[0].kind == "load_closure": + assert self.version >= (3, 0) + self.listcomp_closure3(node) else: - expr = n[1] - if node[-2] == "JUMP_BACK": - list_iter = node[-3] + if node == "listcomp_async": + list_iter_index = 5 else: - list_iter = node[-2] - - assert expr == "expr" - assert list_iter == "list_iter" - - # FIXME: use source line numbers for directing line breaks - - line_number = self.line_number - last_line = self.f.getvalue().split("\n")[-1] - l = len(last_line) - indent = " " * (l - 1) - - self.preorder(expr) - line_number = self.indent_if_source_nl(line_number, indent) - self.preorder(list_iter) - l2 = self.indent_if_source_nl(line_number, indent) - if l2 != line_number: - self.write(" " * (len(indent) - len(self.indent) - 1) + "]") - else: - self.write(" ]") - self.prec = p - self.prune() # stop recursing + list_iter_index = 1 + self.comprehension_walk_newer(node, list_iter_index, 0) + self.write("]") + self.prune() def n_list_comp_pypy27(self, node): """List comprehensions in PYPY.""" @@ -971,25 +925,10 @@ def n_list_comp_pypy27(self, node): self.prec = p self.prune() # stop recursing - def n_listcomp(self, node): - self.write("[") - if node[0].kind == "load_closure": - assert self.version >= (3, 0) - self.listcomp_closure3(node) - else: - if node == "listcomp_async": - list_iter_index = 5 - else: - list_iter_index = 1 - self.comprehension_walk_newer(node, list_iter_index, 0) - self.write("]") - self.prune() - def n_mkfunc(self, node): - code_node = find_code_node(node, -2) code = code_node.attr - self.write(code.co_name) + self.write(get_code_name(code)) self.indent_more() self.make_function(node, is_lambda=False, code_node=code_node) @@ -1024,7 +963,7 @@ def n_return_expr(self, node): else: self.n_expr(node) - # Python 3.x can have be dead code as a result of its optimization? + # Python 3.x can have dead code as a result of its optimization? # So we'll add a # at the end of the return lambda so the rest is ignored def n_return_expr_lambda(self, node): if 1 <= len(node) <= 2: @@ -1034,7 +973,10 @@ def n_return_expr_lambda(self, node): else: # We can't comment out like above because there may be a trailing ')' # that needs to be written - assert len(node) == 3 and node[2] in ("RETURN_VALUE_LAMBDA", "LAMBDA_MARKER") + assert len(node) == 3 and node[2] in ( + "RETURN_VALUE_LAMBDA", + "LAMBDA_MARKER", + ) self.preorder(node[0]) self.prune() @@ -1054,7 +996,16 @@ def n_return_if_stmt(self, node): def n_set_comp(self, node): self.write("{") if node[0] in ["LOAD_SETCOMP", "LOAD_DICTCOMP"]: - self.comprehension_walk_newer(node, 1, 0) + if self.version == (3, 0): + if len(node) >= 6: + iter_index = 6 + else: + assert node[1].kind.startswith("MAKE_FUNCTION") + iter_index = 2 + pass + else: + iter_index = 1 + self.comprehension_walk_newer(node, iter_index=iter_index, code_index=0) elif node[0].kind == "load_closure" and self.version >= (3, 0): self.closure_walk(node, collection_index=4) else: @@ -1160,7 +1111,7 @@ def n_LOAD_CONST(self, node): self.write("...") elif attr is None: # LOAD_CONST 'None' only occurs, when None is - # implicit eg. in 'return' w/o params + # implicit e.g. in 'return' w/o params # pass self.write("None") elif isinstance(data, tuple): diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 52a7014ef..5fb77771b 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2022 by Rocky Bernstein +# Copyright (c) 2015-2024 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock @@ -91,7 +91,7 @@ # the second item is the nonterminal name and the precedence is given last. # # %C evaluate/travers children recursively, with sibling children separated by the -# given string. It needs a 3-tuple: a starting node, the maximimum +# given string. It needs a 3-tuple: a starting node, the maximum # value of an end node, and a string to be inserted between sibling children # # %, Append ',' if last %C only printed one item. This is mostly for tuples @@ -99,12 +99,12 @@ # other tuples. The specifier takes no arguments # # %P same as %C but sets operator precedence. Its argument is a 4-tuple: -# the node low and high indices, the separator, a string the precidence +# the node low and high indices, the separator, a string the precedence # value, an integer. # # %D Same as `%C` this is for left-recursive lists like kwargs where goes # to epsilon at the beginning. It needs a 3-tuple: a starting node, the -# maximimum value of an end node, and a string to be inserted between +# maximum value of an end node, and a string to be inserted between # sibling children. If we were to use `%C` an extra separator with an # epsilon would appear at the beginning. # @@ -119,7 +119,7 @@ # %[N]{EXPR} Python eval(EXPR) in context of node[N]. Takes no arguments # # %[N]{%X} evaluate/recurse on child node[N], using specifier %X. -# %X can be one of the above, e.g. %c, %p, etc. Takes the arguemnts +# %X can be one of the above, e.g. %c, %p, etc. Takes the arguments # that the specifier uses. # # %% literal '%'. Takes no arguments. @@ -130,63 +130,55 @@ # evaluating the escape code. import sys +from io import StringIO +from typing import Optional -IS_PYPY = "__pypy__" in sys.builtin_module_names - -from xdis import iscode, COMPILER_FLAG_BIT +from spark_parser import GenericASTTraversal +from xdis import COMPILER_FLAG_BIT, IS_PYPY, iscode from xdis.version_info import PYTHON_VERSION_TRIPLE -from uncompyle6.parser import get_python_parser +from uncompyle6.parser import get_python_parser, parse from uncompyle6.parsers.treenode import SyntaxTree -from spark_parser import GenericASTTraversal from uncompyle6.scanner import Code, get_scanner -import uncompyle6.parser as python_parser -from uncompyle6.semantics.check_ast import checker - -from uncompyle6.semantics.make_function2 import make_function2 -from uncompyle6.semantics.make_function3 import make_function3 -from uncompyle6.semantics.make_function36 import make_function36 -from uncompyle6.semantics.parser_error import ParserError -from uncompyle6.semantics.customize import customize_for_version -from uncompyle6.semantics.gencomp import ComprehensionMixin -from uncompyle6.semantics.helper import ( - print_docstring, - find_code_node, - find_globals_and_nonlocals, - flatten_list, -) - from uncompyle6.scanners.tok import Token - -from uncompyle6.semantics.n_actions import NonterminalActions -from uncompyle6.semantics.transform import is_docstring, TreeTransform +from uncompyle6.semantics.check_ast import checker from uncompyle6.semantics.consts import ( - ASSIGN_DOC_STRING, ASSIGN_TUPLE_PARAM, INDENT_PER_LEVEL, LINE_LENGTH, - MAP, - MAP_DIRECT, NAME_MODULE, + NO_PARENTHESIS_EVER, NONE, PASS, PRECEDENCE, RETURN_LOCALS, RETURN_NONE, TAB, + TABLE_DIRECT, TABLE_R, escape, - minint, ) - - +from uncompyle6.semantics.customize import customize_for_version +from uncompyle6.semantics.gencomp import ComprehensionMixin +from uncompyle6.semantics.helper import ( + find_globals_and_nonlocals, + is_lambda_mode, + print_docstring, +) +from uncompyle6.semantics.make_function1 import make_function1 +from uncompyle6.semantics.make_function2 import make_function2 +from uncompyle6.semantics.make_function3 import make_function3 +from uncompyle6.semantics.make_function36 import make_function36 +from uncompyle6.semantics.n_actions import NonterminalActions +from uncompyle6.semantics.parser_error import ParserError +from uncompyle6.semantics.transform import TreeTransform, is_docstring from uncompyle6.show import maybe_show_tree from uncompyle6.util import better_repr -DEFAULT_DEBUG_OPTS = {"asm": False, "tree": False, "grammar": False} -def unicode(x): return x -from io import StringIO +def unicode(x): + return x + PARSER_DEFAULT_DEBUG = { "rules": False, @@ -215,11 +207,17 @@ def __str__(self): class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): + """ + Class to traverse a Parse Tree of the bytecode instruction built from parsing to + produce some sort of source text. + The Parse tree may be turned an Abstract Syntax tree as an intermediate step. + """ + stacked_params = ("f", "indent", "is_lambda", "_globals") def __init__( self, - version, + version: tuple, out, scanner, showast=TREE_DEFAULT_DEBUG, @@ -229,26 +227,29 @@ def __init__( linestarts={}, tolerate_errors=False, ): - """`version' is the Python version (a float) of the Python dialect + """`version' is the Python version of the Python dialect of both the syntax tree and language we should produce. `out' is IO-like file pointer to where the output should go. It - whould have a getvalue() method. + would have a getvalue() method. `scanner' is a method to call when we need to scan tokens. Sometimes in producing output we will run across further tokens that need - to be scaned. + to be scanned. If `showast' is True, we print the syntax tree. - `compile_mode' is is either 'exec' or 'single'. It is the compile - mode that was used to create the Syntax Tree and specifies a - gramar variant within a Python version to use. + `compile_mode` is is either `exec`, `single` or `lambda`. + + For `lambda`, the grammar that can be used in lambda + expressions is used. Otherwise, it is the compile mode that + was used to create the Syntax Tree and specifies a grammar + variant within a Python version to use. `is_pypy` should be True if the Syntax Tree was generated for PyPy. `linestarts` is a dictionary of line number to bytecode offset. This - can sometimes assist in determinte which kind of source-code construct + can sometimes assist in determining which kind of source-code construct to use when there is ambiguity. """ @@ -264,28 +265,35 @@ def __init__( is_pypy=is_pypy, ) - # Initialize p_lambda on demand - self.p_lambda = None - - self.treeTransform = TreeTransform(version=self.version, show_ast=showast) - self.debug_parser = dict(debug_parser) - self.showast = showast - self.params = params - self.param_stack = [] self.ERROR = None - self.prec = 100 - self.return_none = False - self.mod_globs = set() - self.currentclass = None + self.ast_errors = [] self.classes = [] - self.pending_newlines = 0 - self.linestarts = linestarts + self.compile_mode = compile_mode + self.currentclass = None + self.debug_parser = dict(debug_parser) + self.is_pypy = is_pypy + self.linemap = {} self.line_number = 1 - self.ast_errors = [] + self.linestarts = linestarts + self.mod_globs = set() + self.name = None + self.offset2inst_index = scanner.offset2inst_index + self.param_stack = [] + self.params = params + self.pending_newlines = 0 + self.prec = NO_PARENTHESIS_EVER + self.return_none = False + self.showast = showast + self.version = version + + self.treeTransform = TreeTransform(version=self.version, show_ast=showast) + # FIXME: have p.insts update in a better way # modularity is broken here self.insts = scanner.insts - self.offset2inst_index = scanner.offset2inst_index + + # Initialize p_lambda on demand + self.p_lambda = None # This is in Python 2.6 on. It changes the way # strings get interpreted. See n_LOAD_CONST @@ -303,23 +311,34 @@ def __init__( self.in_format_string = None # hide_internal suppresses displaying the additional instructions that sometimes - # exist in code but but were not written in the source code. + # exist in code but were not written in the source code. # An example is: # __module__ = __name__ self.hide_internal = True - self.compile_mode = compile_mode - self.name = None - self.version = version - self.is_pypy = is_pypy + + self.TABLE_DIRECT = TABLE_DIRECT.copy() + self.TABLE_R = TABLE_R.copy() + self.MAP_DIRECT = (self.TABLE_DIRECT,) + self.MAP_R = (self.TABLE_R, -1) + + self.MAP = { + "stmt": self.MAP_R, + "call": self.MAP_R, + "delete": self.MAP_R, + "store": self.MAP_R, + } + customize_for_version(self, is_pypy, version) + return - def maybe_show_tree(self, ast, phase): + def maybe_show_tree(self, tree, phase): if self.showast.get("before", False): self.println( """ ---- end before transform """ + + " " ) if self.showast.get("after", False): self.println( @@ -329,9 +348,9 @@ def maybe_show_tree(self, ast, phase): + " " ) if self.showast.get(phase, False): - maybe_show_tree(self, ast) + maybe_show_tree(self, tree) - def str_with_template(self, ast) -> str: + def str_with_template(self, ast): stream = sys.stdout stream.write(self.str_with_template1(ast, "", None)) stream.write("\n") @@ -370,7 +389,6 @@ def str_with_template1(self, ast, indent, sibNum=None) -> str: indent += " " i = 0 for node in ast: - if hasattr(node, "__repr1__"): if enumerate_children: child = self.str_with_template1(node, indent, i) @@ -390,9 +408,9 @@ def str_with_template1(self, ast, indent, sibNum=None) -> str: i += 1 return rv - def indent_if_source_nl(self, line_number, indent): + def indent_if_source_nl(self, line_number: int, indent_spaces: str): if line_number != self.line_number: - self.write("\n" + self.indent + INDENT_PER_LEVEL[:-1]) + self.write("\n" + indent_spaces + INDENT_PER_LEVEL[:-1]) return self.line_number f = property( @@ -514,19 +532,19 @@ def is_return_none(self, node): def pp_tuple(self, tup): """Pretty print a tuple""" last_line = self.f.getvalue().split("\n")[-1] - l = len(last_line) + 1 - indent = " " * l + ll = len(last_line) + 1 + indent = " " * ll self.write("(") sep = "" for item in tup: self.write(sep) - l += len(sep) + ll += len(sep) s = better_repr(item, self.version) - l += len(s) + ll += len(s) self.write(s) sep = "," - if l > LINE_LENGTH: - l = 0 + if ll > LINE_LENGTH: + ll = 0 sep += "\n" + indent else: sep += " " @@ -539,9 +557,11 @@ def pp_tuple(self, tup): # Python changes make function this much that we need at least 3 different routines, # and probably more in the future. def make_function(self, node, is_lambda, nested=1, code_node=None, annotate=None): - if self.version <= (2, 7): + if self.version <= (1, 2): + make_function1(self, node, is_lambda, nested, code_node) + elif self.version <= (2, 7): make_function2(self, node, is_lambda, nested, code_node) - elif (3, 0) <= self.version <= (3, 5): + elif (3, 0) <= self.version < (3, 6): make_function3(self, node, is_lambda, nested, code_node) elif self.version >= (3, 6): make_function36(self, node, is_lambda, nested, code_node) @@ -568,6 +588,7 @@ def print_super_classes(self, node): def print_super_classes3(self, node): n = len(node) - 1 + j = 0 if node.kind != "expr": if node == "kwarg": self.template_engine(("(%[0]{attr}=%c)", 1), node) @@ -605,9 +626,9 @@ def print_super_classes3(self, node): self.write("(") if kwargs: # Last arg is tuple of keyword values: omit - l = n - 1 + m = n - 1 else: - l = n + m = n if kwargs: # 3.6+ does this @@ -619,7 +640,7 @@ def print_super_classes3(self, node): j += 1 j = 0 - while i < l: + while i < m: self.write(sep) value = self.traverse(node[i]) self.write("%s=%s" % (kwargs[j], value)) @@ -627,7 +648,7 @@ def print_super_classes3(self, node): j += 1 i += 1 else: - while i < l: + while i < m: value = self.traverse(node[i]) i += 1 self.write(sep, value) @@ -697,15 +718,16 @@ def kv_map(self, kv_node, sep, line_number, indent): pass def template_engine(self, entry, startnode): - """The format template interpetation engine. See the comment at the - beginning of this module for the how we interpret format + """The format template interpretation engine. See the comment at the + beginning of this module for how we interpret format specifications such as %c, %C, and so on. """ # print("-----") - # print(startnode) + # print(startnode.kind) # print(entry[0]) # print('======') + fmt = entry[0] arg = 1 i = 0 @@ -742,20 +764,31 @@ def template_engine(self, entry, startnode): if isinstance(index[1], str): # if node[index[0]] != index[1]: # from trepan.api import debug; debug() - assert node[index[0]] == index[1], ( - "at %s[%d], expected '%s' node; got '%s'" - % (node.kind, arg, index[1], node[index[0]].kind,) + assert ( + node[index[0]] == index[1] + ), "at %s[%d], expected '%s' node; got '%s'" % ( + node.kind, + arg, + index[1], + node[index[0]].kind, ) else: - assert node[index[0]] in index[1], ( - "at %s[%d], expected to be in '%s' node; got '%s'" - % (node.kind, arg, index[1], node[index[0]].kind,) + assert ( + node[index[0]] in index[1] + ), "at %s[%d], expected to be in '%s' node; got '%s'" % ( + node.kind, + arg, + index[1], + node[index[0]].kind, ) index = index[0] - assert isinstance(index, int), ( - "at %s[%d], %s should be int or tuple" - % (node.kind, arg, type(index),) + assert isinstance( + index, int + ), "at %s[%d], %s should be int or tuple" % ( + node.kind, + arg, + type(index), ) try: @@ -778,14 +811,18 @@ def template_engine(self, entry, startnode): if len(tup) == 3: (index, nonterm_name, self.prec) = tup if isinstance(tup[1], str): - assert node[index] == nonterm_name, ( - "at %s[%d], expected '%s' node; got '%s'" - % (node.kind, arg, nonterm_name, node[index].kind,) + assert ( + node[index] == nonterm_name + ), "at %s[%d], expected '%s' node; got '%s'" % ( + node.kind, + arg, + nonterm_name, + node[index].kind, ) else: assert node[tup[0]] in tup[1], ( - "at %s[%d], expected to be in '%s' node; got '%s'" - % (node.kind, arg, index[1], node[index[0]].kind,) + f"at {node.kind}[{tup[0]}], expected to be in '{tup[1]}' " + f"node; got '{node[tup[0]].kind}'" ) else: @@ -854,7 +891,7 @@ def template_engine(self, entry, startnode): d = node.__dict__ try: self.write(eval(expr, d, d)) - except: + except Exception: raise m = escape.search(fmt, i) self.write(fmt[i:]) @@ -878,17 +915,17 @@ def customize(self, customize): of arguments -- we add a new entry for each in TABLE_R. """ for k, v in list(customize.items()): - if k in TABLE_R: + if k in self.TABLE_R: continue op = k[: k.rfind("_")] if k.startswith("CALL_METHOD"): # This happens in PyPy and Python 3.7+ - TABLE_R[k] = ("%c(%P)", (0, "expr"), (1, -1, ", ", 100)) + self.TABLE_R[k] = ("%c(%P)", (0, "expr"), (1, -1, ", ", 100)) elif self.version >= (3, 6) and k.startswith("CALL_FUNCTION_KW"): - TABLE_R[k] = ("%c(%P)", (0, "expr"), (1, -1, ", ", 100)) + self.TABLE_R[k] = ("%c(%P)", (0, "expr"), (1, -1, ", ", 100)) elif op == "CALL_FUNCTION": - TABLE_R[k] = ( + self.TABLE_R[k] = ( "%c(%P)", (0, "expr"), (1, -1, ", ", PRECEDENCE["yield"] - 1), @@ -898,63 +935,62 @@ def customize(self, customize): "CALL_FUNCTION_VAR_KW", "CALL_FUNCTION_KW", ): - # FIXME: handle everything in customize. # Right now, some of this is here, and some in that. if v == 0: - str = "%c(%C" # '%C' is a dummy here ... - p2 = (0, 0, None) # .. because of the None in this + template_str = "%c(%C" # '%C' is a dummy here ... + p2 = (0, 0, None) # because of the None in this else: - str = "%c(%C, " + template_str = "%c(%C, " p2 = (1, -2, ", ") if op == "CALL_FUNCTION_VAR": # Python 3.5 only puts optional args (the VAR part) - # lowest down the stack + # the lowest down the stack if self.version == (3, 5): - if str == "%c(%C, ": + if template_str == "%c(%C, ": entry = ("%c(*%C, %c)", 0, p2, -2) - elif str == "%c(%C": + elif template_str == "%c(%C": entry = ("%c(*%C)", 0, (1, 100, "")) elif self.version == (3, 4): # CALL_FUNCTION_VAR's top element of the stack contains # the variable argument list if v == 0: - str = "%c(*%c)" - entry = (str, 0, -2) + template_str = "%c(*%c)" + entry = (template_str, 0, -2) else: - str = "%c(%C, *%c)" - entry = (str, 0, p2, -2) + template_str = "%c(%C, *%c)" + entry = (template_str, 0, p2, -2) else: - str += "*%c)" - entry = (str, 0, p2, -2) + template_str += "*%c)" + entry = (template_str, 0, p2, -2) elif op == "CALL_FUNCTION_KW": - str += "**%c)" - entry = (str, 0, p2, -2) + template_str += "**%c)" + entry = (template_str, 0, p2, -2) elif op == "CALL_FUNCTION_VAR_KW": - str += "*%c, **%c)" + template_str += "*%c, **%c)" # Python 3.5 only puts optional args (the VAR part) - # lowest down the stack + # the lowest down the stack na = v & 0xFF # positional parameters if self.version == (3, 5) and na == 0: if p2[2]: p2 = (2, -2, ", ") - entry = (str, 0, p2, 1, -2) + entry = (template_str, 0, p2, 1, -2) else: if p2[2]: p2 = (1, -3, ", ") - entry = (str, 0, p2, -3, -2) + entry = (template_str, 0, p2, -3, -2) pass else: assert False, "Unhandled CALL_FUNCTION %s" % op - TABLE_R[k] = entry + self.TABLE_R[k] = entry pass # handled by n_dict: - # if op == 'BUILD_SLICE': TABLE_R[k] = ('%C' , (0,-1,':')) + # if op == 'BUILD_SLICE': self.TABLE_R[k] = ('%C' , (0,-1,':')) # handled by n_list: - # if op == 'BUILD_LIST': TABLE_R[k] = ('[%C]' , (0,-1,', ')) - # elif op == 'BUILD_TUPLE': TABLE_R[k] = ('(%C%,)', (0,-1,', ')) + # if op == 'BUILD_LIST': self.TABLE_R[k] = ('[%C]' , (0,-1,', ')) + # elif op == 'BUILD_TUPLE': self.TABLE_R[k] = ('(%C%,)', (0,-1,', ')) pass return @@ -987,14 +1023,14 @@ def get_tuple_parameter(self, ast, name): # within the function definition assert node[1] == "store" # if lhs is not a UNPACK_TUPLE (or equiv.), - # add parenteses to make this a tuple + # add parentheses to make this a tuple # if node[1][0] not in ('unpack', 'unpack_list'): result = self.traverse(node[1]) if not (result.startswith("(") and result.endswith(")")): result = "(%s)" % result return result # return self.traverse(node[1]) - raise Exception("Can't find tuple parameter " + name) + return f"({name}" def build_class(self, code): """Dump class definition, doc string and class body.""" @@ -1027,7 +1063,7 @@ def build_class(self, code): if ast[0] == "sstmt": ast[0] = ast[0][0] first_stmt = ast[0] - except: + except Exception: pass try: @@ -1036,7 +1072,7 @@ def build_class(self, code): del ast[0] first_stmt = ast[0] pass - except: + except Exception: pass have_qualname = False @@ -1048,17 +1084,15 @@ def build_class(self, code): if self.version < (3, 0): # Should we ditch this in favor of the "else" case? qualname = ".".join(self.classes) - QUAL_NAME = SyntaxTree( + qual_name_tree = SyntaxTree( "assign", [ SyntaxTree("expr", [Token("LOAD_CONST", pattr=qualname)]), - SyntaxTree( - "store", [Token("STORE_NAME", pattr="__qualname__")] - ), + SyntaxTree("store", [Token("STORE_NAME", pattr="__qualname__")]), ], ) # FIXME: is this right now that we've redone the grammar? - have_qualname = ast[0] == QUAL_NAME + have_qualname = ast[0] == qual_name_tree else: # Python 3.4+ has constants like 'cmp_to_key..K' # which are not simple classes like the < 3 case. @@ -1070,7 +1104,7 @@ def build_class(self, code): and first_stmt[1][0] == Token("STORE_NAME", pattr="__qualname__") ): have_qualname = True - except: + except Exception: pass if have_qualname: @@ -1081,8 +1115,8 @@ def build_class(self, code): # if docstring exists, dump it if code.co_consts and code.co_consts[0] is not None and len(ast) > 0: do_doc = False + i = 0 if is_docstring(ast[0], self.version, code.co_consts): - i = 0 do_doc = True elif len(ast) > 1 and is_docstring(ast[1], self.version, code.co_consts): i = 1 @@ -1091,7 +1125,7 @@ def build_class(self, code): try: # FIXME: Is there an extra [0]? docstring = ast[i][0][0][0][0].pattr - except: + except Exception: docstring = code.co_consts[0] if print_docstring(self, indent, docstring): self.println() @@ -1117,7 +1151,6 @@ def build_class(self, code): # else: # print stmt[-1] - globals, nonlocals = find_globals_and_nonlocals( ast, set(), set(), code, self.version ) @@ -1161,16 +1194,13 @@ def gen_source( else: self.customize(customize) self.text = self.traverse(ast, is_lambda=is_lambda) - # In a formatted string using "lambda', we should not add "\n". + # In a formatted string using "lambda", we should not add "\n". # For example in: # f'{(lambda x:x)("8")!r}' # Adding a "\n" after "lambda x: x" will give an error message: # SyntaxError: f-string expression part cannot include a backslash - # So avoid that. - printfn = ( - self.write if self.in_format_string and is_lambda else self.println - ) - printfn(self.text) + # So avoid \n after writing text + self.write(self.text) self.name = old_name self.return_none = rn @@ -1182,8 +1212,8 @@ def build_ast( is_lambda=False, noneInNames=False, is_top_level_module=False, - ): - + compile_mode="exec", + ) -> GenericASTTraversal: # FIXME: DRY with fragments.py # assert isinstance(tokens[0], Token) @@ -1201,31 +1231,41 @@ def build_ast( p_insts = self.p.insts self.p.insts = self.scanner.insts self.p.offset2inst_index = self.scanner.offset2inst_index - ast = python_parser.parse(self.p, tokens, customize, code) + ast = parse(self.p, tokens, customize, code) self.customize(customize) self.p.insts = p_insts - except (python_parser.ParserError, AssertionError) as e: + except (ParserError, AssertionError) as e: raise ParserError(e, tokens, self.p.debug["reduce"]) transform_tree = self.treeTransform.transform(ast, code) self.maybe_show_tree(ast, phase="after") del ast # Save memory return transform_tree - # The bytecode for the end of the main routine has a - # "return None". However you can't issue a "return" statement in - # main. So as the old cigarette slogan goes: I'd rather switch (the token stream) - # than fight (with the grammar to not emit "return None"). + # The bytecode for the end of the main routine has a "return + # None". However, you can't issue a "return" statement in + # main. So as the old cigarette slogan goes: I'd rather switch + # (the token stream) than fight (with the grammar to not emit + # "return None"). if self.hide_internal: if len(tokens) >= 2 and not noneInNames: if tokens[-1].kind in ("RETURN_VALUE", "RETURN_VALUE_LAMBDA"): # Python 3.4's classes can add a "return None" which is # invalid syntax. - if tokens[-2].kind == "LOAD_CONST": - if is_top_level_module or tokens[-2].pattr is None: - del tokens[-2:] - else: - tokens.append(Token("RETURN_LAST")) + load_const = tokens[-2] + # We should have: + # LOAD_CONST None + # with *no* line number associated the token. + # A line number on the token or a non-None + # token value a token based on user source + # text. + if ( + load_const.kind == "LOAD_CONST" + and load_const.linestart is None + and load_const.attr is None + ): + # Delete LOAD_CONST (None) RETURN_VALUE + del tokens[-2:] else: tokens.append(Token("RETURN_LAST")) if len(tokens) == 0: @@ -1234,41 +1274,43 @@ def build_ast( # Build a parse tree from a tokenized and massaged disassembly. try: # FIXME: have p.insts update in a better way - # modularity is broken here + # Modularity is broken here. p_insts = self.p.insts self.p.insts = self.scanner.insts self.p.offset2inst_index = self.scanner.offset2inst_index self.p.opc = self.scanner.opc - ast = python_parser.parse(self.p, tokens, customize, code) + ast = parse(self.p, tokens, customize, code) self.p.insts = p_insts - except (python_parser.ParserError, AssertionError) as e: + except (ParserError, AssertionError) as e: raise ParserError(e, tokens, self.p.debug["reduce"]) checker(ast, False, self.ast_errors) self.customize(customize) + transform_tree = self.treeTransform.transform(ast, code) - self.maybe_show_tree(ast, phase="before") + self.maybe_show_tree(transform_tree, phase="after") del ast # Save memory return transform_tree - @classmethod - def _get_mapping(cls, node): - return MAP.get(node, MAP_DIRECT) + def _get_mapping(self, node): + return self.MAP.get(node, self.MAP_DIRECT) def code_deparse( co, out=sys.stdout, - version=None, + version: Optional[tuple] = None, debug_opts=DEFAULT_DEBUG_OPTS, code_objects={}, compile_mode="exec", is_pypy=IS_PYPY, walker=SourceWalker, -): + start_offset: int = 0, + stop_offset: int = -1, +) -> Optional[SourceWalker]: """ ingests and deparses a given code block 'co'. If version is None, we will use the current Python interpreter version. @@ -1276,6 +1318,9 @@ def code_deparse( assert iscode(co) + if out is None: + out = sys.stdout + if version is None: version = PYTHON_VERSION_TRIPLE @@ -1286,6 +1331,21 @@ def code_deparse( co, code_objects=code_objects, show_asm=debug_opts["asm"] ) + if start_offset > 0: + for i, t in enumerate(tokens): + # If t.offset is a string, we want to skip this. + if isinstance(t.offset, int) and t.offset >= start_offset: + tokens = tokens[i:] + break + + if stop_offset > -1: + for i, t in enumerate(tokens): + # In contrast to the test for start_offset If t.offset is + # a string, we want to extract the integer offset value. + if t.off2int() >= stop_offset: + tokens = tokens[:i] + break + debug_parser = debug_opts.get("grammar", dict(PARSER_DEFAULT_DEBUG)) # Build Syntax Tree from disassembly. @@ -1309,16 +1369,17 @@ def code_deparse( tokens, customize, co, - is_lambda=(compile_mode == "lambda"), + is_lambda=is_lambda_mode(compile_mode), is_top_level_module=is_top_level_module, + compile_mode=compile_mode, ) - #### XXX workaround for profiling + # XXX workaround for profiling if deparsed.ast is None: return None # FIXME use a lookup table here. - if compile_mode == "lambda": + if is_lambda_mode(compile_mode): expected_start = "lambda_start" elif compile_mode == "eval": expected_start = "expr_start" @@ -1331,10 +1392,12 @@ def code_deparse( expected_start = None else: expected_start = None + if expected_start: - assert ( - deparsed.ast == expected_start - ), f"Should have parsed grammar start to '{expected_start}'; got: {deparsed.ast.kind}" + assert deparsed.ast == expected_start, ( + f"Should have parsed grammar start to '{expected_start}'; " + f"got: {deparsed.ast.kind}" + ) # save memory del tokens @@ -1344,16 +1407,11 @@ def code_deparse( assert not nonlocals - if version >= (3, 0): - load_op = "LOAD_STR" - else: - load_op = "LOAD_CONST" - # convert leading '__doc__ = "..." into doc string try: stmts = deparsed.ast - first_stmt = stmts[0][0] - if version >= 3.6: + first_stmt = stmts[0] + if version >= (3, 6): if first_stmt[0] == "SETUP_ANNOTATIONS": del stmts[0] assert stmts[0] == "sstmt" @@ -1361,13 +1419,13 @@ def code_deparse( first_stmt = stmts[0][0] pass pass - if first_stmt == ASSIGN_DOC_STRING(co.co_consts[0], load_op): + if first_stmt == "docstring": print_docstring(deparsed, "", co.co_consts[0]) del stmts[0] if stmts[-1] == RETURN_NONE: stmts.pop() # remove last node # todo: if empty, add 'pass' - except: + except Exception: pass deparsed.FUTURE_UNICODE_LITERALS = ( @@ -1379,7 +1437,7 @@ def code_deparse( deparsed.ast, name=co.co_name, customize=customize, - is_lambda=compile_mode == "lambda", + is_lambda=is_lambda_mode(compile_mode), debug_opts=debug_opts, ) @@ -1407,9 +1465,12 @@ def deparse_code2str( compile_mode="exec", is_pypy=IS_PYPY, walker=SourceWalker, -): - """Return the deparsed text for a Python code object. `out` is where any intermediate - output for assembly or tree output will be sent. + start_offset: int = 0, + stop_offset: int = -1, +) -> str: + """ + Return the deparsed text for a Python code object. `out` is where + any intermediate output for assembly or tree output will be sent. """ return code_deparse( code, @@ -1426,7 +1487,7 @@ def deparse_code2str( if __name__ == "__main__": def deparse_test(co): - "This is a docstring" + """This is a docstring""" s = deparse_code2str(co) # s = deparse_code2str(co, debug_opts={"asm": "after", "tree": {'before': False, 'after': False}}) print(s) diff --git a/uncompyle6/semantics/transform.py b/uncompyle6/semantics/transform.py index 9eaf80cd7..8992a2615 100644 --- a/uncompyle6/semantics/transform.py +++ b/uncompyle6/semantics/transform.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021 by Rocky Bernstein +# Copyright (c) 2019-2024 by Rocky Bernstein # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -13,14 +13,16 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from uncompyle6.show import maybe_show_tree from copy import copy +from typing import Optional + from spark_parser import GenericASTTraversal, GenericASTTraversalPruningException -from uncompyle6.semantics.helper import find_code_node from uncompyle6.parsers.treenode import SyntaxTree from uncompyle6.scanners.tok import NoneToken, Token -from uncompyle6.semantics.consts import RETURN_NONE, ASSIGN_DOC_STRING +from uncompyle6.semantics.consts import ASSIGN_DOC_STRING, RETURN_NONE +from uncompyle6.semantics.helper import find_code_node +from uncompyle6.show import maybe_show_tree def is_docstring(node, version, co_consts): @@ -55,27 +57,34 @@ def is_docstring(node, version, co_consts): return node == ASSIGN_DOC_STRING(co_consts[0], doc_load) -def is_not_docstring(call_stmt_node): +def is_not_docstring(call_stmt_node) -> bool: try: return ( call_stmt_node == "call_stmt" and call_stmt_node[0][0] == "LOAD_STR" and call_stmt_node[1] == "POP_TOP" ) - except: + except Exception: return False class TreeTransform(GenericASTTraversal, object): - def __init__(self, version, show_ast=None, is_pypy=False): + def __init__( + self, + version: tuple, + is_pypy=False, + show_ast: Optional[dict] = None, + ): self.version = version self.showast = show_ast self.is_pypy = is_pypy return - def maybe_show_tree(self, ast): - if isinstance(self.showast, dict) and self.showast: - maybe_show_tree(self, ast) + def maybe_show_tree(self, tree): + if isinstance(self.showast, dict) and ( + self.showast.get("before") or self.showast.get("after") + ): + maybe_show_tree(self, tree) def preorder(self, node=None): """Walk the tree in roughly 'preorder' (a bit of a lie explained below). @@ -119,17 +128,10 @@ def n_mkfunc(self, node): mkfunc_pattr = node[-1].pattr if isinstance(mkfunc_pattr, tuple): - assert len(mkfunc_pattr, 4) and isinstance(mkfunc_pattr, int) - is_closure = node[-1].pattr[3] != 0 - else: - # FIXME: This is what we had before. It is hoaky and probably wrong. - is_closure = mkfunc_pattr == "closure" + assert isinstance(mkfunc_pattr, tuple) + assert len(mkfunc_pattr) == 4 and isinstance(mkfunc_pattr, int) - if ( - (not is_closure) - and len(code.co_consts) > 0 - and isinstance(code.co_consts[0], str) - ): + if len(code.co_consts) > 0 and isinstance(code.co_consts[0], str): docstring_node = SyntaxTree( "docstring", [Token("LOAD_STR", has_arg=True, pattr=code.co_consts[0])] ) @@ -141,7 +143,7 @@ def n_mkfunc(self, node): def n_ifstmt(self, node): """Here we check if we can turn an `ifstmt` or 'iflaststmtl` into - some kind of `assert` statement""" + some kind of `assert` statement""" testexpr = node[0] @@ -153,7 +155,11 @@ def n_ifstmt(self, node): if ifstmts_jump == "_ifstmts_jumpl" and ifstmts_jump[0] == "_ifstmts_jump": ifstmts_jump = ifstmts_jump[0] - elif ifstmts_jump not in ("_ifstmts_jump", "_ifstmts_jumpl", "ifstmts_jumpl"): + elif ifstmts_jump not in ( + "_ifstmts_jump", + "_ifstmts_jumpl", + "ifstmts_jumpl", + ): return node stmts = ifstmts_jump[0] else: @@ -213,10 +219,11 @@ def n_ifstmt(self, node): kind = "assert2not" LOAD_ASSERT = call[0].first_child() - if LOAD_ASSERT not in ( "LOAD_ASSERT", "LOAD_GLOBAL"): + if LOAD_ASSERT not in ("LOAD_ASSERT", "LOAD_GLOBAL"): return node if isinstance(call[1], SyntaxTree): expr = call[1][0] + assert_expr.transformed_by = "n_ifstmt" node = SyntaxTree( kind, [ @@ -226,8 +233,8 @@ def n_ifstmt(self, node): expr, RAISE_VARARGS_1, ], + transformed_by="n_ifstmt", ) - node.transformed_by = "n_ifstmt" pass pass else: @@ -255,9 +262,10 @@ def n_ifstmt(self, node): LOAD_ASSERT = expr[0] node = SyntaxTree( - kind, [assert_expr, jump_cond, LOAD_ASSERT, RAISE_VARARGS_1] + kind, + [assert_expr, jump_cond, LOAD_ASSERT, RAISE_VARARGS_1], + transformed_by="n_ifstmt", ) - node.transformed_by = ("n_ifstmt",) pass pass return node @@ -268,7 +276,7 @@ def n_ifstmt(self, node): # if elif elif def n_ifelsestmt(self, node, preprocess=False): """ - Transformation involving if..else statments. + Transformation involving if..else statements. For example @@ -294,25 +302,31 @@ def n_ifelsestmt(self, node, preprocess=False): len_n = len(n) # Sometimes stmt is reduced away and n[0] can be a single reduction like continue -> CONTINUE. - if len_n == 1 and isinstance(n[0], SyntaxTree) and len(n[0]) == 1 and n[0] == "stmt": + if ( + len_n == 1 + and isinstance(n[0], SyntaxTree) + and len(n[0]) == 1 + and n[0] == "stmt" + ): n = n[0][0] elif len_n == 0: return node - elif n[0].kind in ("lastc_stmt", "lastl_stmt"): + + if n[0].kind in ("lastc_stmt", "lastl_stmt"): n = n[0] - if n[0].kind in ( - "ifstmt", - "iflaststmt", - "iflaststmtl", - "ifelsestmtl", - "ifelsestmtc", - "ifpoplaststmtl", - ): - n = n[0] - if n.kind == "ifpoplaststmtl": - old_stmts = n[2] - else_suite_index = 2 - pass + + if n[0].kind in ( + "ifstmt", + "iflaststmt", + "iflaststmtl", + "ifelsestmtl", + "ifelsestmtc", + "ifpoplaststmtl", + ): + n = n[0] + if n.kind == "ifpoplaststmtl": + old_stmts = n[2] + else_suite_index = 2 pass else: if ( @@ -412,27 +426,31 @@ def n_list_for(self, list_for_node): list_for_node.transformed_by = ("n_list_for",) return list_for_node + def n_negated_testtrue(self, node): + assert node[0] == "testtrue" + test_node = node[0][0] + test_node.transformed_by = "n_negated_testtrue" + return test_node + def n_stmts(self, node): if node.first_child() == "SETUP_ANNOTATIONS": prev = node[0][0] new_stmts = [node[0]] for i, sstmt in enumerate(node[1:]): ann_assign = sstmt[0] - if ( - ann_assign == "ann_assign" - and prev == "assign" - ): + if ann_assign == "ann_assign" and prev == "assign": annotate_var = ann_assign[-2] if annotate_var.attr == prev[-1][0].attr: node[i].kind = "deleted " + node[i].kind del new_stmts[-1] ann_assign_init = SyntaxTree( - "ann_assign_init", [ann_assign[0], copy(prev[0]), annotate_var] - ) + "ann_assign_init", + [ann_assign[0], copy(prev[0]), annotate_var], + ) if sstmt[0] == "ann_assign": sstmt[0] = ann_assign_init else: - sstmt[0][0] = ann_assing_init + sstmt[0][0] = ann_assign_init sstmt[0].transformed_by = "n_stmts" pass pass @@ -446,43 +464,35 @@ def traverse(self, node, is_lambda=False): node = self.preorder(node) return node - def transform(self, ast, code): - self.maybe_show_tree(ast) - self.ast = copy(ast) + def transform(self, parse_tree: GenericASTTraversal, code) -> GenericASTTraversal: + self.maybe_show_tree(parse_tree) + self.ast = copy(parse_tree) + del parse_tree self.ast = self.traverse(self.ast, is_lambda=False) + n = len(self.ast) try: # Disambiguate a string (expression) which appears as a "call_stmt" at # the beginning of a function versus a docstring. Seems pretty academic, # but this is Python. - call_stmt = ast[0][0] + call_stmt = self.ast[0][0] if is_not_docstring(call_stmt): call_stmt.kind = "string_at_beginning" call_stmt.transformed_by = "transform" pass - except: + except Exception: pass try: - for i in range(len(self.ast)): - sstmt = ast[i] + for i in range(n): + sstmt = self.ast[i] if len(sstmt) == 1 and sstmt == "sstmt": self.ast[i] = self.ast[i][0] if is_docstring(self.ast[i], self.version, code.co_consts): - load_const = self.ast[i].first_child() - docstring_ast = SyntaxTree( - "docstring", - [ - Token( - "LOAD_STR", - has_arg=True, - offset=0, - attr=load_const.attr, - pattr=load_const.pattr, - ) - ], - ) + load_const = copy(self.ast[i].first_child()) + store_name = copy(self.ast[i].last_child()) + docstring_ast = SyntaxTree("docstring", [load_const, store_name]) docstring_ast.transformed_by = "transform" del self.ast[i] self.ast.insert(0, docstring_ast) @@ -491,7 +501,7 @@ def transform(self, ast, code): if self.ast[-1] == RETURN_NONE: self.ast.pop() # remove last node # todo: if empty, add 'pass' - except: + except Exception: pass return self.ast diff --git a/uncompyle6/show.py b/uncompyle6/show.py index 52b8dd5e7..376e810e8 100644 --- a/uncompyle6/show.py +++ b/uncompyle6/show.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018, 2020 Rocky Bernstein +# Copyright (C) 2018, 2020, 2023 Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/uncompyle6/util.py b/uncompyle6/util.py index 6f01995bd..129e7666a 100644 --- a/uncompyle6/util.py +++ b/uncompyle6/util.py @@ -3,8 +3,14 @@ # More could be done here though. from math import copysign -from xdis.version_info import PYTHON_VERSION +from xdis.cross_types import UnicodeForPython3 +from xdis.version_info import PYTHON_VERSION_TRIPLE +def get_code_name(code) -> str: + code_name = code.co_name + if isinstance(code_name, UnicodeForPython3): + return code_name.value.decode("utf-8") + return code_name def is_negative_zero(n): """Returns true if n is -0.0""" @@ -36,7 +42,7 @@ def better_repr(v, version): if len(v) == 1: return "(%s,)" % better_repr(v[0], version) return "(%s)" % ", ".join(better_repr(i, version) for i in v) - elif PYTHON_VERSION < 3.0 and isinstance(v, long): + elif PYTHON_VERSION_TRIPLE < (3, 0) and isinstance(v, long): s = repr(v) if version >= 3.0 and s[-1] == "L": return s[:-1] diff --git a/uncompyle6/verify.py b/uncompyle6/verify.py index 285b2daec..33a525e9d 100755 --- a/uncompyle6/verify.py +++ b/uncompyle6/verify.py @@ -1,5 +1,5 @@ # -# (C) Copyright 2015-2018, 2020-2021 by Rocky Bernstein +# (C) Copyright 2015-2018, 2020-2021, 2023 by Rocky Bernstein # (C) Copyright 2000-2002 by hartmut Goebel # # This program is free software: you can redistribute it and/or modify @@ -185,7 +185,7 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, name=""): # print dir(code_obj1) if isinstance(code_obj1, object): # new style classes (Python 2.2) - # assume _both_ code objects to be new stle classes + # assume _both_ code objects to be new style classes assert dir(code_obj1) == dir(code_obj2) else: # old style classes @@ -205,7 +205,7 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, name=""): # should be faster and more sophisticated # if this compare fails, we use the old routine to # find out, what exactly is nor equal - # if this compare succeds, simply return + # if this compare succeeds, simply return # return pass @@ -411,7 +411,7 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, name=""): check_jumps[dest1].append((i1, i2, dest2)) else: check_jumps[dest1] = [(i1, i2, dest2)] - except: + except Exception: pass i1 += 1 diff --git a/uncompyle6/version.py b/uncompyle6/version.py index 44c88a7dc..232cf9c61 100644 --- a/uncompyle6/version.py +++ b/uncompyle6/version.py @@ -14,4 +14,4 @@ # This file is suitable for sourcing inside POSIX shell as # well as importing into Python # fmt: off -__version__="3.9.0a1" # noqa +__version__="3.9.4.dev0" # noqa