diff --git a/.gitignore b/.gitignore index 3887410a..91b3a823 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*.csv *.db *.egg-info/ *.pyc @@ -16,7 +17,9 @@ .tox MANIFEST build/* +data/ dist/* -docs/_build +docs-build/ +docs/man/ +docs/reference/ reg_settings.py -rows.1 diff --git a/Dockerfile b/Dockerfile index 18c0fc2f..51221bd7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,29 +1,22 @@ -FROM debian +FROM python:3.7 MAINTAINER Álvaro Justen -# install system dependencies +# Install system dependencies RUN apt-get update -RUN apt-get install --no-install-recommends -y build-essential git locales \ - python-dev python-lxml \ - python-pip python-snappy \ - python-thrift && \ - apt-get clean +RUN apt-get install --no-install-recommends -y \ + build-essential git locales python3-dev libsnappy-dev \ + libxml2-dev libxslt-dev libz-dev && \ + apt-get clean && \ + pip install --no-cache-dir -U pip -#thrift (used by parquet plugin) is the only which needs build-essential and -#python-dev to be installed (installing python-thrift doesn't do the job). - -#You can build other Python libraries from source by installing: -# libsnappy-dev libxml2-dev libxslt-dev libz-dev -#and not installing: -# python-lxml python-snappy - -# configure locale (needed to run tests) +# Configure locale (needed to run tests) RUN echo 'en_US.UTF-8 UTF-8' > /etc/locale.gen RUN echo 'pt_BR.UTF-8 UTF-8' >> /etc/locale.gen RUN /usr/sbin/locale-gen -# clone the repository and install Python dependencies -RUN git clone https://github.com/turicas/rows.git ~/rows -RUN cd ~/rows && pip install -r requirements-development.txt && \ - rm -rf ~/.cache/pip/ -RUN cd ~/rows && pip install -e . +# Clone the repository and install Python dependencies +RUN git clone https://github.com/turicas/rows.git /rows +RUN cd /rows && \ + git checkout master && \ + pip install --no-cache-dir -r requirements-development.txt && \ + pip install --no-cache-dir -e . diff --git a/Makefile b/Makefile index 5179cad8..0d1278cb 100644 --- a/Makefile +++ b/Makefile @@ -7,10 +7,8 @@ test: clean: find -regex '.*\.pyc' -exec rm {} \; find -regex '.*~' -exec rm {} \; - rm -rf reg-settings.py - rm -rf MANIFEST dist build *.egg-info - rm -rf rows.1 - rm -rf .tox + rm -rf reg-settings.py MANIFEST dist build *.egg-info rows.1 .tox + rm -rf docs-build docs/reference docs/man coverage erase fix-imports: @@ -32,11 +30,23 @@ lint: lint-tests: pylint tests/*.py -man: - head -1 rows.1.txt > rows.1 - txt2man rows.1.txt | egrep -v '^\.TH' >> rows.1 +docs: + make clean install + click-man --target=docs/man/ rows + pycco --directory=docs/reference --generate_index --skip-bad-files rows/*.py + pycco --directory=docs/reference/plugins --generate_index --skip-bad-files rows/plugins/*.py + mkdocs build --strict --site-dir=docs-build + rm -rf docs/man docs/reference + +docs-serve: docs + cd docs-build && python3 -m http.server + +docs-upload: docs + -git branch --delete --force --quiet gh-pages + -git push turicas :gh-pages + ghp-import --no-jekyll --message="Docs automatically built from $(shell git rev-parse HEAD)" --branch=gh-pages --push --force --remote=turicas docs-build/ release: - python setup.py bdist bdist_wheel bdist_egg upload + python setup.py bdist bdist_wheel --universal bdist_egg upload -.PHONY: test clean fix-imports lint lint-tests install uninstall man release +.PHONY: test clean docs docs-serve docs-upload fix-imports lint lint-tests install uninstall release diff --git a/README.md b/README.md index 52aaebca..59890a98 100644 --- a/README.md +++ b/README.md @@ -6,11 +6,12 @@ ![Supported Python Versions](https://img.shields.io/pypi/pyversions/rows.svg) ![Software status](https://img.shields.io/pypi/status/rows.svg) [![License: LGPLv3](https://img.shields.io/pypi/l/rows.svg)](https://github.com/turicas/rows/blob/develop/LICENSE) -[![Donate](https://img.shields.io/gratipay/turicas.svg?style=social&label=Donate)](https://www.gratipay.com/turicas) No matter in which format your tabular data is: `rows` will import it, automatically detect types and give you high-level Python objects so you can start **working with the data** instead of **trying to parse it**. It is also locale-and-unicode aware. :) -Want to learn more? [Read the documentation](http://turicas.info/rows). +Want to learn more? [Read the documentation](http://turicas.info/rows) (or +build and browse the docs locally by running `make docs-serve` after installing +`requirements-development.txt`). diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index f27f5e72..00000000 --- a/docs/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -# Makefile for Sphinx documentation -# You can set these variables from the command line. -SPHINXBUILD = sphinx-build -BUILDDIR = _build - -# Internal variables. -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees . - -help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " clean remove current BUILDDIR" - @echo " publish publish to the gh-pages branch" - -clean: - rm -rf $(BUILDDIR)/* - -html: - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - - -publish: - cd $(BUILDDIR)/html; git add . ; git commit -m "rebuilt docs"; git push origin gh-pages - -.PHONY: help clean html publish diff --git a/docs/changelog.md b/docs/changelog.md index f299acdc..7a9f1b38 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,4 +1,30 @@ -# rows' Log of Changes +# Log of Changes + +## Version `0.4.2dev0` + +**Released on: (in development)** + +### General Changes and Enhancements +### Plugins +### Command-Line Interface +### Utils +### Bug Fixes + +## Version `0.4.1` (bugfix release) + +**Released on: 2019-02-14** + +### General Changes and Enhancements + +- Add new way to make docs (remove sphinx and uses mkdocs + click-man + pycco) +- Update Dockerfile + + +### Bug Fixes + +- [#305](https://github.com/turicas/rows/issues/305) "0" was not being + deserialized by `IntegerField` + ## Version `0.4.0` diff --git a/docs/cli.md b/docs/cli.md index 603383bb..9d827d80 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -4,7 +4,9 @@ converting and querying data. > Note: we still need to improve this documentation. Please run `rows --help` -> to see all the available commands and take a look at [rows/cli.py][rows-cli]. +> to see all the available commands, [see the code reference][cli-reference] or +> take a look at [rows/cli.py][rows-cli]. [Man pages are also +> available][cli-manpage]. ## Commands @@ -30,7 +32,7 @@ local filename (example: `rows convert https://website/file.html file.csv`). (compressed or not) in the most optimized way: using `psql`'s `COPY` command. - [`rows pgimport`][cli-pgimport]: import a CSV file (compressed or not) into a PostgreSQL table in the most optimized way: using `psql`'s `COPY` command. -- [`rows print`][cli-print]: print a table in the standard output (you can +- [`rows print`][cli-print]: print a table to the standard output (you can choose between some frame styles). - [`rows query`][cli-query]: query a table using SQL (converts the table to an in-memory SQLite database) and output to the standard output or a file. @@ -43,7 +45,7 @@ local filename (example: `rows convert https://website/file.html file.csv`). > Note: everytime we specify "compressed or not" means you can use the file as > is or a compressed version of it. The supported compression formats are: -> gzip (`.gz`), lzma (`.xz`) and bzip2 (`.bz2`). The [support for archive +> gzip (`.gz`), lzma (`.xz`) and bzip2 (`.bz2`). [Support for archive > formats such as zip, tar and rar will be implemented in the > future][issue-archives]. @@ -63,9 +65,15 @@ also have specific options. The global options are: Convert a table from a `source` URI to `destination`. Useful to convert files between formats, like extracting data from a HTML table and converting to CSV. +> Note: if you'd like to convert from/to CSV, SQLite or PostgreSQL, see the +> more optimized commands [`csv2sqlite`][cli-csv2sqlite], +> [`sqlite2csv`][cli-sqlite2csv], [`pgimport`][cli-pgimport] and +> [`pgexport`][cli-pgexport]. + Usage: `rows convert [OPTIONS] SOURCE DESTINATION` Options: + - `--input-encoding=TEXT`: Encoding of input tables (default: `utf-8`) - `--output-encoding=TEXT`: Encoding of output tables (default: `utf-8`) - `--input-locale=TEXT`: Locale of input tables. Used to parse integers, floats @@ -79,15 +87,19 @@ Options: - `--fields=TEXT`: A comma-separated list of fields to import (default: all fields) - `--fields-exclude=TEXT`: A comma-separated list of fields to exclude when - exporting (default: all fields) + exporting (default: none) -Example: +Examples: ```bash # needs: pip install rows[html] rows convert \ http://www.sports-reference.com/olympics/countries/BRA/summer/2016/ \ brazil-2016.csv + +rows convert \ + http://www.worldometers.info/world-population/population-by-country/ \ + population.csv ``` @@ -125,11 +137,13 @@ rows csv2sqlite \ ## `rows join` Join tables from `source` URIs using `key(s)` to group rows and save into -`destination`. +`destination`. **This command is not optimized and its use is discouraged** +([rows query][cli-query] may be more effective). Usage: `rows join [OPTIONS] KEYS SOURCES... DESTINATION` Options: + - `--input-encoding=TEXT`: Encoding of input tables (default: `utf-8`) - `--output-encoding=TEXT`: Encoding of output tables (default: `utf-8`) - `--input-locale=TEXT`: Locale of input tables. Used to parse integers, floats @@ -143,7 +157,7 @@ Options: - `--fields=TEXT`: A comma-separated list of fields to import (default: all fields) - `--fields-exclude=TEXT`: A comma-separated list of fields to exclude when - exporting (default: all fields) + exporting (default: none) Example: join `a.csv` and `b.csv` into a new file called `c.csv` using the field `id` as a key (both `a.csv` and `b.csv` must have the field `id`): @@ -214,6 +228,7 @@ are: gzip (`.gz`), lzma (`.xz`) and bzip2 (`.bz2`). Usage: `rows pgimport [OPTIONS] SOURCE DATABASE_URI TABLE_NAME` Options: + - `--input-encoding=TEXT`: Encoding of input CSV file (default: `utf-8`) - `--no-create-table=BOOLEAN`: should rows create the table or leave it to PostgreSQL? (default: false, ie: create the table) @@ -254,7 +269,7 @@ Options: - `--fields=TEXT`: A comma-separated list of fields to import (default: all fields) - `--fields-exclude=TEXT`: A comma-separated list of fields to exclude when - exporting (default: all fields) + exporting (default: none) - `--frame-style=TEXT`: frame style to "draw" the table; options: `ascii`, `single`, `double`, `none` (default: `ascii`) - `--table-index=INTEGER`: if source is HTML, specify the table index to @@ -269,6 +284,9 @@ rows print \ data/brazilian-cities.csv ``` +> Note: download [brazilian-cities.csv][br-cities]. + + ```bash # needs: pip install rows[html] rows print \ @@ -280,7 +298,7 @@ rows print \ Yep, you can SQL-query any supported file format! Each of the source files will be a table inside an in-memory SQLite database, called `table1`, ..., `tableN`. -If the `--output` is not specified, `rows` will print a table on the standard +If the `--output` is not specified, `rows` will print a table to the standard output. Usage: `rows query [OPTIONS] QUERY SOURCES...` @@ -306,11 +324,13 @@ Examples: ```bash # needs: pip install rows[html] rows query \ - 'SELECT * FROM table1 WHERE inhabitants > 1000000' \ + "SELECT * FROM table1 WHERE inhabitants > 1000000" \ data/brazilian-cities.csv \ --output=data/result.html ``` +> Note: download [brazilian-cities.csv][br-cities]. + ```bash # needs: pip install rows[pdf] rows query \ @@ -341,6 +361,7 @@ Usage: `rows schema [OPTIONS] SOURCE [OUTPUT]` Options: + - `--input-encoding=TEXT`: Encoding of input tables (default: `utf-8`) - `--input-locale=TEXT`: Locale of input tables. Used to parse integers, floats etc. (default: `C`) @@ -351,18 +372,18 @@ Options: - `--fields=TEXT`: A comma-separated list of fields to import (default: all fields) - `--fields-exclude=TEXT`: A comma-separated list of fields to exclude when - exporting (default: all fields) + exporting (default: none) - `--samples=INTEGER`: number of sample rows to detect schema (default: `5000`) Example: ```bash -rows schema \ - --samples=100 \ - data/brazilian-cities.csv +rows schema --samples=100 data/brazilian-cities.csv ``` +> Note: download [brazilian-cities.csv][br-cities]. + Output: ``` @@ -385,6 +406,7 @@ compression formats are: gzip (`.gz`), lzma (`.xz`) and bzip2 (`.bz2`). Usage: `rows sqlite2csv [OPTIONS] SOURCE TABLE_NAME OUTPUT` Options: + - `--batch-size=INTEGER`: number of rows to batch insert into SQLite (default: `10000`) - `--dialect=TEXT`: CSV dialect to be used on output file (default: `excel`) @@ -392,17 +414,16 @@ Options: Example: ```bash -rows sqlite2csv \ - my_db.sqlite \ - my_table \ - my_table.csv.bz2 +rows sqlite2csv my_db.sqlite my_table my_table.csv.bz2 ``` ## `rows sum` -Sum tables from `source` URIs and save into `destination`. The tables must have -the same fields. +Sum tables (append rows from one to the other) from `source` URIs and save into +`destination`. The tables must have the same fields. **This command is not +optimized and its use is discouraged** ([rows query][cli-query] may be more +effective). Usage: `rows sum [OPTIONS] SOURCES... DESTINATION` @@ -421,7 +442,7 @@ Options: - `--fields=TEXT`: A comma-separated list of fields to import (default: all fields) - `--fields-exclude=TEXT`: A comma-separated list of fields to exclude when - exporting (default: all fields) + exporting (default: none) Example: @@ -434,16 +455,19 @@ rows sum \ ``` +[br-cities]: https://gist.github.com/turicas/ec0abcfe0d7abf7a97ef7a0c1d72c7f7 [cli-convert]: #rows-convert [cli-csv2sqlite]: #rows-csv2sqlite [cli-join]: #rows-join +[cli-manpage]: man/rows.1 [cli-pdf-to-text]: #rows-pdf-to-text [cli-pgexport]: #rows-pgexport [cli-pgimport]: #rows-pgimport [cli-print]: #rows-print [cli-query]: #rows-query +[cli-reference]: reference/cli.html [cli-schema]: #rows-schema [cli-sqlite2csv]: #rows-sqlite2csv [cli-sum]: #rows-sum [issue-archives]: https://github.com/turicas/rows/issues/236 -[rows-cli]: https://github.com/turicas/rows/blob/develop/rows/cli.py +[rows-cli]: https://github.com/turicas/rows/blob/master/rows/cli.py diff --git a/docs/conf.py b/docs/conf.py deleted file mode 100644 index e0c3af14..00000000 --- a/docs/conf.py +++ /dev/null @@ -1,344 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# -# rows documentation build configuration file, created by -# sphinx-quickstart on Tue Oct 18 12:11:09 2016. -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) - - -from recommonmark.parser import CommonMarkParser -from recommonmark.transform import AutoStructify - -# -- General configuration ------------------------------------------------ - -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode"] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] - -source_parsers = {".md": CommonMarkParser} -source_suffix = [".rst", ".md"] - -# The encoding of source files. -source_encoding = "utf-8-sig" - -# The master toctree document. -master_doc = "index" - -# General information about the project. -project = "rows" -copyright = "2014-2019, Álvaro Justen" -author = "Álvaro Justen" - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = "0.4" -# The full version, including alpha/beta/rc tags. -release = "0.4.0" - -github_doc_root = "https://github.com/turicas/rows/tree/master/docs/" - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -# -# today = '' -# -# Else, today_fmt is used as the format for a strftime call. -# -# today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] - -# The reST default role (used for this markup: `text`) to use for all -# documents. -# -# default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -# -# add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -# -# add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -# -# show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = "sphinx" - -# A list of ignored prefixes for module index sorting. -# modindex_common_prefix = [] - -# If true, keep warnings as "system message" paragraphs in the built documents. -# keep_warnings = False - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = False - - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = "alabaster" - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -# html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -# html_theme_path = [] - -# The name for this set of Sphinx documents. -# " v documentation" by default. -# -# html_title = 'rows v0.4.0' - -# A shorter title for the navigation bar. Default is the same as html_title. -# -# html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -# -# html_logo = None - -# The name of an image file (relative to this directory) to use as a favicon of -# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -# -# html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -# html_static_path = ['_static'] - -# Add any extra paths that contain custom files (such as robots.txt or -# .htaccess) here, relative to this directory. These files are copied -# directly to the root of the documentation. -# -# html_extra_path = [] - -# If not None, a 'Last updated on:' timestamp is inserted at every page -# bottom, using the given strftime format. -# The empty string is equivalent to '%b %d, %Y'. -# -# html_last_updated_fmt = None - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -# -# html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -# -# html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -# -# html_additional_pages = {} - -# If false, no module index is generated. -# -# html_domain_indices = True - -# If false, no index is generated. -# -# html_use_index = True - -# If true, the index is split into individual pages for each letter. -# -# html_split_index = False - -# If true, links to the reST sources are added to the pages. -# -# html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -# -# html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -# -# html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -# -# html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -# html_file_suffix = None - -# Language to be used for generating the HTML full-text search index. -# Sphinx supports the following languages: -# 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' -# 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' -# -html_search_language = "en" - -# A dictionary with options for the search language support, empty by default. -# 'ja' uses this config value. -# 'zh' user can custom change `jieba` dictionary path. -# -# html_search_options = {'type': 'default'} - -# The name of a javascript file (relative to the configuration directory) that -# implements a search results scorer. If empty, the default will be used. -# -# html_search_scorer = 'scorer.js' - -# Output file base name for HTML help builder. -htmlhelp_basename = "rowsdoc" - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -# -# latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -# -# latex_use_parts = False - -# If true, show page references after internal links. -# -# latex_show_pagerefs = False - -# If true, show URL addresses after external links. -# -# latex_show_urls = False - -# Documents to append as an appendix to all manuals. -# -# latex_appendices = [] - -# It false, will not define \strong, \code, itleref, \crossref ... but only -# \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added -# packages. -# -# latex_keep_old_macro_names = True - -# If false, no module index is generated. -# -# latex_domain_indices = True - - -# -- Options for manual page output --------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [(master_doc, "rows", "rows Documentation", [author], 1)] - -# If true, show URL addresses after external links. -# -# man_show_urls = False - - -# -- Options for Texinfo output ------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - ( - master_doc, - "rows", - "rows Documentation", - author, - "rows", - "A common, beautiful interface to tabular data, no matter the format", - "Miscellaneous", - ) -] - -# Documents to append as an appendix to all manuals. -# -# texinfo_appendices = [] - -# If false, no module index is generated. -# -# texinfo_domain_indices = True - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -# -# texinfo_show_urls = 'footnote' - -# If true, do not generate a @detailmenu in the "Top" node's menu. -# -# texinfo_no_detailmenu = False - - -def setup(app): - app.add_config_value( - "recommonmark_config", - { - # when True, Auto Toc Tree will only be enabled on section that matches the title. - "auto_toc_tree_section": "Contents", - # a function that maps a existing relative position in the document to a http link - "url_resolver": lambda url: github_doc_root + url, - # enable the Auto Toc Tree feature - "enable_auto_toc_tree": True, - # enable the Auto Doc Ref feature - "enable_auto_doc_ref": True, - # enable the Math Formula feature - "enable_math": True, - # enable the Inline Math feature - "enable_inline_math": True, - # enable the evaluate embedded reStructuredText feature - "enable_eval_rst": True, - }, - True, - ) - app.add_transform(AutoStructify) diff --git a/docs/contributing.md b/docs/contributing.md index 452f2bd1..245d6891 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -86,20 +86,33 @@ nosetests -dsv --with-yanc --with-coverage --cover-package rows tests/tests_plug ``` -## Generating the manual +## Generating the documentation -To create the man page you'll need to install [txt2man][txt2man]. In Debian -(and Debian-based distributions) you can install by running: +Just run: ```bash -apt install txt2man +make docs ``` -Then, you can generate the `rows.1` file by running: +And check the `docs-build/` directory. + +You can also serve it via HTTP: ```bash -make man +make docs-serve ``` -[txt2man]: http://mvertes.free.fr/ +## Releasing new versions + +``` +# X = next version number +git checkout -b release/X +# update docs/changelog.md & commit +# change version number in `setup` and `rows/__init__.py` & commit +git checkout master && git merge --no-ff release/X +git tag -a X +git br -d release/X +make release +make docs-upload +``` diff --git a/docs/index.md b/docs/index.md index 1480a0e0..18788f2f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -18,16 +18,9 @@ work on other versions too). > us][rows-issue-103]! :-) -## Core Values - -- Simple, easy and flexible to use -- Code quality -- Don't Repeat Yourself - - ## Contents -- [Installation][doc-installing] +- [Installation][doc-installation] - [Quick-start guide][doc-quick-start] - [Command-line interface][doc-cli] - [Supported plugins][doc-plugins] @@ -36,6 +29,7 @@ work on other versions too). - [Contributing][doc-contributing] - [Useful links][doc-links] - [Log of changes][doc-changelog] +- [Code reference][reference] ## Basic Usage @@ -68,15 +62,18 @@ anything: ```python import rows -cities = rows.import_from_csv('data/brazilian-cities.csv') -rio_biggest_cities = [city for city in cities - if city.state == 'RJ' and - city.inhabitants > 500000] +cities = rows.import_from_csv("data/brazilian-cities.csv") +rio_biggest_cities = [ + city for city in cities + if city.state == "RJ" and city.inhabitants > 500000 +] for city in rio_biggest_cities: - print('{} ({:5.2f} ppl/km²)'.format(city.city, - city.inhabitants / city.area)) + density = city.inhabitants / city.area + print(f"{city.city} ({density:5.2f} ppl/km²)") ``` +> Note: download [brazilian-cities.csv][br-cities]. + The result: ```text @@ -91,14 +88,19 @@ command-line interface for more common tasks. For more examples, please refer to our [quick-start guide][doc-quick-start]. +> Note: `rows` is still not lazy by default, except for some operations like +> `csv2sqlite`, `sqlite2csv`, `pgimport` and `pgexport` (so using +> `rows.import_from_X` will put everything in memory), [we're working on +> this][rows-lazyness]. + ## Architecture The library is composed by: - A common interface to tabular data (the `Table` class) -- A set of plugins to populate `Table` objects (CSV, XLS, XLSX, HTML and XPath, - Parquet, TXT, JSON, SQLite -- more coming soon!) +- A set of plugins to populate `Table` objects from formats like CSV, XLS, + XLSX, HTML and XPath, Parquet, PDF, TXT, JSON, SQLite; - A set of common fields (such as `BoolField`, `IntegerField`) which know exactly how to serialize and deserialize data for each object type you'll get - A set of utilities (such as field type recognition) to help working with @@ -110,7 +112,8 @@ The library is composed by: ## Semantic Versioning `rows` uses [semantic versioning][semver]. Note that it means we do not -guarantee API backwards compatibility on `0.x.y` versions. +guarantee API backwards compatibility on `0.x.y` versions (but we try the best +to). ## License @@ -119,16 +122,19 @@ This library is released under the [GNU Lesser General Public License version 3][lgpl3]. +[br-cities]: https://gist.github.com/turicas/ec0abcfe0d7abf7a97ef7a0c1d72c7f7 +[doc-changelog]: changelog.md [doc-cli]: cli.md [doc-contributing]: contributing.md -[doc-installing]: installing.md +[doc-installation]: installation.md [doc-links]: links.md [doc-locale]: locale.md [doc-operations]: operations.md [doc-plugins]: plugins.md -[doc-changelog]: changelog.md [doc-quick-start]: quick-start.md [lgpl3]: http://www.gnu.org/licenses/lgpl-3.0.html +[reference]: reference/ [rows-issue-103]: https://github.com/turicas/rows/issues/103 +[rows-lazyness]: https://github.com/turicas/rows/issues/45 [rows]: https://github.com/turicas/rows/ [semver]: http://semver.org/ diff --git a/docs/installing.md b/docs/installation.md similarity index 62% rename from docs/installing.md rename to docs/installation.md index 15b4064c..d95a358e 100644 --- a/docs/installing.md +++ b/docs/installation.md @@ -1,16 +1,17 @@ -# Installing rows +# Installation -Directly from [PyPI][pypi-rows]: +## [PyPI][pypi-rows] ```bash pip install rows ``` -You can also install directly from the GitHub repository to have the newest -features (not pretty stable) by running: +## GitHub ```bash -pip install git+https://github.com/turicas/rows.git@develop#egg=rows +pip install "https://github.com/turicas/rows/archive/develop.zip#egg=rows" +# or (needs git) +pip install "git+https://github.com/turicas/rows.git@develop#egg=rows" ``` or: @@ -29,6 +30,44 @@ You can create a development image using Docker: cat Dockerfile | docker build -t turicas/rows:latest - ``` +## Debian + +If you use Debian [sid][debian-sid] or [testing][debian-testing] you can +install it directly from the main repository by running: + +```bash +apt install python-rows # Python library only +apt install rows # Python library + CLI +``` + +You may need to install SQLite too (on Ubuntu, for example). + + +## Fedora + +```bash +dnf install python-row # Python library + CLI +``` + + +## Docker + +If you don't want to install on your machine but you'd like to try the library, +there's a docker image available: + +```bash +mkdir -p data # Put your files here +echo -e "a,b\n1,2\n3,4" > data/test.csv + +# To access the IPython shell: +docker run --rm -it -v $(pwd)/data:/data turicas/rows:0.4.0 ipython + +# To access the command-line interface +docker run --rm -it -v $(pwd)/data:/data turicas/rows:0.4.0 rows print /data/test.csv +``` + +## Installing plugins + The plugins `csv`, `dicts`, `json`, `sqlite` and `txt` are built-in by default but if you want to use another one you need to explicitly install its dependencies, for example: @@ -55,20 +94,8 @@ requirement: pip install rows[all] ``` -If you use Debian [sid][debian-sid] or [testing][debian-testing] you can -install it directly from the main repository by running: - -```bash -apt install python-rows # Python library only -apt install rows # Python library + CLI -``` - -And in Fedora: - -```bash -dnf install python-row # Python library + CLI -``` - -You may need to install SQLite too (on Ubuntu, for example). +[debian-sid]: https://www.debian.org/releases/sid/ +[debian-testing]: https://www.debian.org/releases/testing/ [pypi-rows]: https://pypi.org/project/rows/ +[rows-cli]: cli.md diff --git a/docs/links.md b/docs/links.md index 10a62f1d..163424d4 100644 --- a/docs/links.md +++ b/docs/links.md @@ -1,8 +1,9 @@ -# Links +# Useful Links ## Showcase +- (Portuguese) [How rows is helping make Brazilian data more accessible][brasilio-talk-pt] - (Portuguese) [Talk (videos + slides) on rows by Álvaro Justen][rows-talk-pt] @@ -69,3 +70,4 @@ [rows-showcase-source]: https://github.com/leonardocsantoss/django-rows [rows-showcase]: http://rows.irdx.com.br/ [rows-talk-pt]: http://blog.justen.eng.br/2016/05/dados-tabulares-a-maneira-pythonica.html +[brasilio-talk-pt]: https://www.youtube.com/watch?v=MZZFmucRxoY diff --git a/docs/locale.md b/docs/locale.md index 4d78ab38..e5164566 100644 --- a/docs/locale.md +++ b/docs/locale.md @@ -14,14 +14,14 @@ import requests import rows from io import BytesIO -url = 'http://cidades.ibge.gov.br/comparamun/compara.php?idtema=1&codv=v01&coduf=33' +url = "http://cidades.ibge.gov.br/comparamun/compara.php?idtema=1&codv=v01&coduf=33" html = requests.get(url).content -with rows.locale_context(name='pt_BR.UTF-8', category=locale.LC_NUMERIC): +with rows.locale_context(name="pt_BR.UTF-8", category=locale.LC_NUMERIC): rio = rows.import_from_html(BytesIO(html)) total_population = sum(city.pessoas for city in rio) # 'pessoas' is the fieldname related to the number of people in each city -print('Rio de Janeiro has {} inhabitants'.format(total_population)) +print(f"Rio de Janeiro has {total_population} inhabitants") ``` The column `pessoas` will be imported as an `IntegerField` and the result is: @@ -32,9 +32,16 @@ Rio de Janeiro has 15989929 inhabitants ## Locale dependency -`rows.locale_context` depends on your operational system locales to work. In order to successfully use this context manager make sure the desired locale is available in a system level. +`rows.locale_context` depends on your operational system locales to work. In +order to successfully use this context manager make sure the desired locale is +available in a system level. For example, for Debian based systems: 1. Make sure the desired locale is present and uncommented `/etc/locale.gen` 2. Run `locale-gen` + +For more information [see the code reference][locale-reference]. + + +[locale-reference]: reference/localization.html diff --git a/docs/make.bat b/docs/make.bat deleted file mode 100644 index 50664222..00000000 --- a/docs/make.bat +++ /dev/null @@ -1,281 +0,0 @@ -@ECHO OFF - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set BUILDDIR=_build -set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . -set I18NSPHINXOPTS=%SPHINXOPTS% . -if NOT "%PAPER%" == "" ( - set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% - set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% -) - -if "%1" == "" goto help - -if "%1" == "help" ( - :help - echo.Please use `make ^` where ^ is one of - echo. html to make standalone HTML files - echo. dirhtml to make HTML files named index.html in directories - echo. singlehtml to make a single large HTML file - echo. pickle to make pickle files - echo. json to make JSON files - echo. htmlhelp to make HTML files and a HTML help project - echo. qthelp to make HTML files and a qthelp project - echo. devhelp to make HTML files and a Devhelp project - echo. epub to make an epub - echo. epub3 to make an epub3 - echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter - echo. text to make text files - echo. man to make manual pages - echo. texinfo to make Texinfo files - echo. gettext to make PO message catalogs - echo. changes to make an overview over all changed/added/deprecated items - echo. xml to make Docutils-native XML files - echo. pseudoxml to make pseudoxml-XML files for display purposes - echo. linkcheck to check all external links for integrity - echo. doctest to run all doctests embedded in the documentation if enabled - echo. coverage to run coverage check of the documentation if enabled - echo. dummy to check syntax errors of document sources - goto end -) - -if "%1" == "clean" ( - for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i - del /q /s %BUILDDIR%\* - goto end -) - - -REM Check if sphinx-build is available and fallback to Python version if any -%SPHINXBUILD% 1>NUL 2>NUL -if errorlevel 9009 goto sphinx_python -goto sphinx_ok - -:sphinx_python - -set SPHINXBUILD=python -m sphinx.__init__ -%SPHINXBUILD% 2> nul -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -:sphinx_ok - - -if "%1" == "html" ( - %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/html. - goto end -) - -if "%1" == "dirhtml" ( - %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. - goto end -) - -if "%1" == "singlehtml" ( - %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. - goto end -) - -if "%1" == "pickle" ( - %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can process the pickle files. - goto end -) - -if "%1" == "json" ( - %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can process the JSON files. - goto end -) - -if "%1" == "htmlhelp" ( - %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can run HTML Help Workshop with the ^ -.hhp project file in %BUILDDIR%/htmlhelp. - goto end -) - -if "%1" == "qthelp" ( - %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can run "qcollectiongenerator" with the ^ -.qhcp project file in %BUILDDIR%/qthelp, like this: - echo.^> qcollectiongenerator %BUILDDIR%\qthelp\rows.qhcp - echo.To view the help file: - echo.^> assistant -collectionFile %BUILDDIR%\qthelp\rows.ghc - goto end -) - -if "%1" == "devhelp" ( - %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. - goto end -) - -if "%1" == "epub" ( - %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The epub file is in %BUILDDIR%/epub. - goto end -) - -if "%1" == "epub3" ( - %SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3 - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The epub3 file is in %BUILDDIR%/epub3. - goto end -) - -if "%1" == "latex" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "latexpdf" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - cd %BUILDDIR%/latex - make all-pdf - cd %~dp0 - echo. - echo.Build finished; the PDF files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "latexpdfja" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - cd %BUILDDIR%/latex - make all-pdf-ja - cd %~dp0 - echo. - echo.Build finished; the PDF files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "text" ( - %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The text files are in %BUILDDIR%/text. - goto end -) - -if "%1" == "man" ( - %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The manual pages are in %BUILDDIR%/man. - goto end -) - -if "%1" == "texinfo" ( - %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. - goto end -) - -if "%1" == "gettext" ( - %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The message catalogs are in %BUILDDIR%/locale. - goto end -) - -if "%1" == "changes" ( - %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes - if errorlevel 1 exit /b 1 - echo. - echo.The overview file is in %BUILDDIR%/changes. - goto end -) - -if "%1" == "linkcheck" ( - %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck - if errorlevel 1 exit /b 1 - echo. - echo.Link check complete; look for any errors in the above output ^ -or in %BUILDDIR%/linkcheck/output.txt. - goto end -) - -if "%1" == "doctest" ( - %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest - if errorlevel 1 exit /b 1 - echo. - echo.Testing of doctests in the sources finished, look at the ^ -results in %BUILDDIR%/doctest/output.txt. - goto end -) - -if "%1" == "coverage" ( - %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage - if errorlevel 1 exit /b 1 - echo. - echo.Testing of coverage in the sources finished, look at the ^ -results in %BUILDDIR%/coverage/python.txt. - goto end -) - -if "%1" == "xml" ( - %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The XML files are in %BUILDDIR%/xml. - goto end -) - -if "%1" == "pseudoxml" ( - %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. - goto end -) - -if "%1" == "dummy" ( - %SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. Dummy builder generates no files. - goto end -) - -:end diff --git a/docs/operations.md b/docs/operations.md index a5389778..593662eb 100644 --- a/docs/operations.md +++ b/docs/operations.md @@ -1,4 +1,4 @@ -# `Table` operations +# Table operations The module `rows.operations` contains some operations you can do on your `Table` objects: @@ -13,5 +13,7 @@ The module `rows.operations` contains some operations you can do on your transformation function. - `rows.operations.transpose`: transpose the `Table` based on a specific field. +For more details [see the reference][operations-reference]. -[rows-cli-query]: https://github.com/turicas/rows/blob/develop/rows/cli.py#L291 +[rows-cli-query]: https://github.com/turicas/rows/blob/master/rows/cli.py#L291 +[operations-reference]: reference/operations.html diff --git a/docs/plugins.md b/docs/plugins.md index a3a1ce31..6971dfc4 100644 --- a/docs/plugins.md +++ b/docs/plugins.md @@ -1,13 +1,12 @@ -# Plugins +# Supported Plugins -The idea behing plugins is very simple: you write a little piece of code which -extracts data from/to some specific format and the library will do the other -tasks for you, such as detecting and converting data types. So writing a plugin -is as easy as reading from/writing to the file format you want. If you don't -find the plugin for the format you need, feel free [to -contribute][doc-contributing]. :-) +The idea behing plugins is very simple: it's a piece of code which extracts +data from/exports to some specific format and interfaces with the core library +functions, which will know how to detect and convert data types, export to +other formats etc. If you don't find the plugin for the format you need, feel +free [to contribute][doc-contributing]. :-) -Each `import_from_*` function receive specific parameters (depending on the +Each `import_from_X` function receive specific parameters (depending on the format you're working) but also general parameters such as `skip_header` and `fields` (they are passed to the [rows.plugins.utils.create_table function][create-table-function]). @@ -23,8 +22,28 @@ see the plugins in action. :) Current implemented plugins: +- [CSV][section-csv] +- [List of dicts][section-dicts] +- [HTML][section-html] +- [JSON][section-json] +- [ODS][section-ods] +- [Parquet][section-parquet] +- [PDF][section-pdf] +- [PostgreSQL][section-postgresql] +- [SQLite][section-sqlite] +- [TXT][section-txt] +- [XLS][section-xls] +- [XLSX][section-xlsx] +- [XPath][section-xpath] + +> Note: `rows` is still not lazy by default, except for some operations like +> `csv2sqlite`, `sqlite2csv`, `pgimport` and `pgexport` (so using +> `rows.import_from_X` will put everything in memory), [we're working on +> this][rows-lazyness]. + ## CSV +[See code reference][reference-csv] Use `rows.import_from_csv` and `rows.export_to_csv` (dependencies are installed by default). The CSV dialect is **detected automatically** but you can specify @@ -44,13 +63,8 @@ Learn by example: - [`examples/library/usa_legislators.py`][example-legislators] -## TXT - -Use `rows.import_from_txt` and `rows.export_to_txt` (no dependencies). You can -customize the border style. - - ## List of dicts +[See code reference][reference-dicts] Use `rows.import_from_dicts` and `rows.export_to_dicts` (no dependencies). Useful when you have the data in memory and would like to detect/convert data @@ -61,20 +75,14 @@ Learn by example: - [`examples/library/organizaciones.py`][example-organizaciones] -## JSON - -Use `rows.import_from_json` and `rows.export_to_json` (no dependencies). Each -table is converted to an array of objects (where each row is represented by an -object). - - ## HTML +[See code reference][reference-html] Use `rows.import_from_html` and `rows.export_to_html` (dependencies must be -installed with `pip install rows[html]`). You can pass the table index (in case -there's more than one `` inside the HTML) and decide to kee the HTML -inside the `
` tags (useful to extract links and data inside HTML -properties). +installed with `pip install rows[html]`). You can specify the table index in +case there's more than one `` inside the HTML, decide whether to keep +the HTML code inside the `
` tags (useful to extract links and "hidden" +data) and other options. Very useful in Web scraping. Learn by example: @@ -93,25 +101,23 @@ Helper functions: HTML (returns a list of strings). -## XPath +## JSON +[See code reference][reference-json] -Dependencies must be installed with `pip install rows[xpath]`). Use -`rows.import_from_xpath` passing the following arguments: +Use `rows.import_from_json` and `rows.export_to_json` (no dependencies). Each +table is converted to an array of objects (where each row is represented by an +object). -- `filename_or_fobj`: source XML/HTML; -- `rows_xpath`: each result must represent a row in the new returned `Table`; -- `fields_xpath`: must be an `collections.OrderedDict`, where the key is the - field name and the value the XPath to extract the desired value for this - field (you'll probrably want to use `./` so it'll search inside the row found - by `rows_xpath`). -Learn by example: +## ODS +[See code reference][reference-ods] -- [`examples/library/ecuador_radiodifusoras.py`][example-radiodifusoras] -- [`examples/library/brazilian_cities_wikipedia.py`][example-br-cities] +Use `rows.import_from_ods` (dependencies must be installed with `pip install +rows[ods]`). ## Parquet +[See code reference][reference-parquet] Use `rows.import_from_parquet` passing the filename (dependencies must be installed with `pip install rows[parquet]` and if the data is compressed using @@ -121,11 +127,11 @@ more details and one example. ## PDF +[See code reference][reference-pdf] Use `rows.import_from_pdf` (dependencies must be installed with `pip install rows[pdf]`). - ### PDF Parser Backend There are two available backends (under-the-hood libraries to parse the PDF), @@ -146,12 +152,10 @@ You can specify some parameters to delimit where the table is located in the PDF, like: - `starts_after` and `ends_before`: delimits the objects before/after the - table. Can be: - - Regular strings (exact match); - - Regular expressions; - - Functions (receives the object and must return `True` for the object which - define if the table starts/ends there). -- `page_numbers`: sequence with desired page numbers (starts from `1`); + table. Can be: regular strings (exact match); regular expressions objects; or + functions (receives the object and must return `True` for the object which + define if the table starts/ends there). +- `page_numbers`: sequence with desired page numbers (starts from `1`). ### Specify Detection Algorithms @@ -164,7 +168,9 @@ programatically with `rows.plugins.pdf.algorithms()`): - `rows.plugins.pdf.YGroupsAlgorithm`: default, group text objects by y position and identify table lines based on these groups. -- `rows.plugins.pdf.HeaderPositionAlgorithm`: +- `rows.plugins.pdf.HeaderPositionAlgorithm`: use the table header to identify + cell positions and then fill the table with found objects (useful in sparse + tables). - `rows.plugins.pdf.RectsBoundariesAlgorithm`: detect the table boundaries by the rectangles on the page (currently only available using the `'pdfminer'` backend, which is very slow). @@ -185,35 +191,44 @@ programatically with `rows.plugins.pdf.algorithms()`): ### Examples -- [`balneabilidade-bahia`][example-balneabilidade]: scraping project (downloads - 1400+ PDFs from a Brazilian organization which monitors water quality, then - extract the tables in each PDF and put all rows together in one CSV). +- [`balneabilidade-brasil`][example-balneabilidade]: downloads thousands of + PDFs from Brazilian organizations which monitors water quality, then extract + the tables in each PDF and put all rows together in one CSV; - [`examples/cli/extract-pdf.sh`][example-pdf-cli]: PDF extraction using the command-line interface (the parameters cannot be customized using this method by now -- more improvements in next versions). -## XLS +## PostgreSQL +[See code reference][reference-postgresql] -Use `rows.import_from_xls` and `rows.export_to_xls` (dependencies must be -installed with `pip install rows[xls]`). You can customize things like -`sheet_name`, `sheet_index`, `start_row`, `end_row`, `start_column` and -`end_column` (the last 5 options are indexes and starts from 0). +Use `rows.import_from_postgresql` and `rows.export_to_postgresql` (dependencies +must be installed with `pip install rows[postgresql]`). -On `rows.export_to_xls` you can define the `sheet_name`. +### Parameters +On both `rows.import_from_postgresql` and `rows.export_to_postgresql` you can pass +either a connection string or a `psycopg2` connection object. -## XLSX +On `rows.import_from_postgresql` you can pass a `query` parameter instead of a +`table_name`. -use `rows.import_from_xlsx` and `rows.export_to_xlsx` (dependencies must be -installed with `pip install rows[xlsx]`). You can customize things like -`sheet_name`, `sheet_index`, `start_row`, `end_row`, `start_column` and -`end_column` (the last 5 options are indexes and starts from 0). +### Helper Functions -On `rows.export_to_xlsx` you can define the `sheet_name`. +- `rows.utils.pgimport`: import data from CSV into PostgreSQL using the fastest + possible method - requires the `psql` command available on your system (the + command-line version of this function is pretty useful -- see more by running + `rows pgimport --help`). The CSV can be optionally compressed (`.csv`, + `.csv.gz` and `.csv.xz`); +- `rows.utils.pgexport`: export data from PostgreSQL into a CSV file using the + fastest possible method - requires the `psql` command available on your + system (the command-line version of this function is pretty useful -- see + more by running `rows pgexport --help`). The CSV can be optionally compressed + (`.csv`, `.csv.gz` and `.csv.xz`). ## SQLite +[See code reference][reference-sqlite] Use `rows.import_from_sqlite` and `rows.export_to_sqlite` (no dependencies). @@ -225,49 +240,91 @@ Helper functions: `.csv.gz` and `.csv.xz`). -## PostgreSQL +## TXT +[See code reference][reference-txt] -Use `rows.import_from_postgresql` and `rows.export_to_postgresql` (dependencies -must be installed with `pip install rows[postgresql]`). +Use `rows.import_from_txt` and `rows.export_to_txt` (no dependencies). You can +customize the border style. -### Parameters -On both `rows.import_from_postgresql` and `rows.export_to_postgresql` you can pass -either a connection string or a `psycopg2` connection object. +## XLS +[See code reference][reference-xls] -On `rows.import_from_postgresql` you can pass a `query` parameter instead of a -`table_name`. +Use `rows.import_from_xls` and `rows.export_to_xls` (dependencies must be +installed with `pip install rows[xls]`). You can customize things like +`sheet_name`, `sheet_index`, `start_row`, `end_row`, `start_column` and +`end_column` (the last 5 options are indexes and starts from 0). -### Helper Functions +On `rows.export_to_xls` you can define the `sheet_name`. -- `rows.utils.pgimport`: import data from CSV into PostgreSQL using the fastest - possible method - requires the `psql` command available on your system (the - command-line version of this function is pretty useful -- see more by running - `rows pgimport --help`). The CSV can be optionally compressed (`.csv`, - `.csv.gz` and `.csv.xz`); -- `rows.utils.pgexport`: export data from PostgreSQL into a CSV file using the - fastest possible method - requires the `psql` command available on your - system (the command-line version of this function is pretty useful -- see - more by running `rows pgexport --help`). The CSV can be optionally compressed - (`.csv`, `.csv.gz` and `.csv.xz`). +## XLSX +[See code reference][reference-xlsx] -## ODS +use `rows.import_from_xlsx` and `rows.export_to_xlsx` (dependencies must be +installed with `pip install rows[xlsx]`). You can customize things like +`sheet_name`, `sheet_index`, `start_row`, `end_row`, `start_column` and +`end_column` (the last 5 options are indexes and starts from 0). -Use `rows.import_from_ods` (dependencies must be installed with `pip install -rows[ods]`). +On `rows.export_to_xlsx` you can define the `sheet_name`. + + +## XPath +[See code reference][reference-xpath] + +Dependencies must be installed with `pip install rows[xpath]`). Very useful in +Web scraping. Use `rows.import_from_xpath` passing the following arguments: + +- `filename_or_fobj`: source XML/HTML; +- `rows_xpath`: XPath to find the elements which will be transformed into rows; +- `fields_xpath`: `collections.OrderedDict` containing XPaths for each of the + fields (key: field name, value: XPath string) - you'll probrably want to use + `./` so it'll search inside the row found by `rows_xpath`). + +Learn by example: + +- [`examples/library/ecuador_radiodifusoras.py`][example-radiodifusoras] +- [`examples/library/brazilian_cities_wikipedia.py`][example-br-cities] [blog-rows-parquet]: http://blog.justen.eng.br/2016/03/reading-parquet-files-in-python-with-rows.html -[create-table-function]: https://github.com/turicas/rows/blob/develop/rows/utils.py -[plugins-source]: https://github.com/turicas/rows/tree/develop/rows/plugins -[examples]: https://github.com/turicas/rows/tree/develop/examples/library -[example-airports]: https://github.com/turicas/rows/blob/develop/examples/library/airports.py -[example-radiodifusoras]: https://github.com/turicas/rows/blob/develop/examples/library/ecuador_radiodifusoras.py -[example-br-cities]: https://github.com/turicas/rows/blob/develop/examples/library/brazilian_cities_wikipedia.py -[example-extract-links]: https://github.com/turicas/rows/blob/develop/examples/library/extract_links.py -[example-organizaciones]:[https://github.com/turicas/rows/blob/develop/examples/library/organizaciones.py] -[example-slip-opinions]: https://github.com/turicas/rows/blob/develop/examples/library/slip_opinions.py -[example-legislators]: https://github.com/turicas/rows/blob/develop/examples/library/usa_legislators.py +[create-table-function]: https://github.com/turicas/rows/blob/master/rows/utils.py +[doc-contributing]: contributing.md +[example-airports]: https://github.com/turicas/rows/blob/master/examples/library/airports.py [example-balneabilidade]: https://github.com/Correio24horas/balneabilidade-bahia -[example-pdf-cli]: https://github.com/turicas/rows/blob/develop/examples/cli/extract-pdf.sh +[example-br-cities]: https://github.com/turicas/rows/blob/master/examples/library/brazilian_cities_wikipedia.py +[example-extract-links]: https://github.com/turicas/rows/blob/master/examples/library/extract_links.py +[example-legislators]: https://github.com/turicas/rows/blob/master/examples/library/usa_legislators.py +[example-organizaciones]: https://github.com/turicas/rows/blob/master/examples/library/organizaciones.py +[example-pdf-cli]: https://github.com/turicas/rows/blob/master/examples/cli/extract-pdf.sh +[example-radiodifusoras]: https://github.com/turicas/rows/blob/master/examples/library/ecuador_radiodifusoras.py +[example-slip-opinions]: https://github.com/turicas/rows/blob/master/examples/library/slip_opinions.py +[examples]: https://github.com/turicas/rows/tree/master/examples/library +[plugins-source]: https://github.com/turicas/rows/tree/master/rows/plugins +[reference-csv]: reference/plugins/plugin_csv.html +[reference-dicts]: reference/plugins/dicts.html +[reference-html]: reference/plugins/plugin_html.html +[reference-json]: reference/plugins/plugin_json.html +[reference-ods]: reference/plugins/ods.html +[reference-parquet]: reference/plugins/plugin_parquet.html +[reference-pdf]: reference/plugins/plugin_pdf.html +[reference-postgresql]: reference/plugins/postgresql.html +[reference-sqlite]: reference/plugins/sqlite.html +[reference-txt]: reference/plugins/txt.html +[reference-xls]: reference/plugins/xls.html +[reference-xlsx]: reference/plugins/xlsx.html +[reference-xpath]: reference/plugins/xpath.html +[rows-lazyness]: https://github.com/turicas/rows/issues/45 +[section-csv]: #csv +[section-dicts]: #list-of-dicts +[section-html]: #html +[section-json]: #json +[section-ods]: #ods +[section-parquet]: #parquet +[section-pdf]: #pdf +[section-postgresql]: #postgresql +[section-sqlite]: #sqlite +[section-txt]: #txt +[section-xls]: #xls +[section-xlsx]: #xlsx +[section-xpath]: #xpath diff --git a/docs/quick-start.md b/docs/quick-start.md index 684003d4..0a847f4d 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -2,8 +2,9 @@ ## Programatically creating a `Table` object -`rows` can import data from any of the supported formats and will return a -`Table` object for you, but you can also create a `Table` object by hand. +`rows` can import data from any of the supported formats (using +`rows.import_from_X` functions) and will return a `Table` object for you, but +you can also create a `Table` object by hand. ### Using `Table.append` @@ -11,196 +12,185 @@ from collections import OrderedDict from rows import fields, Table -my_fields = OrderedDict([('name', fields.TextField), - ('age', fields.IntegerField),]) -table = Table(fields=my_fields) -table.append({'name': 'Álvaro Justen', 'age': 30}) -table.append({'name': 'Another Guy', 'age': 42}) +# Create a schema for the new table (check also all the available field types +# inside `rows.fields`). +country_fields = OrderedDict([ + ("name", fields.TextField), + ("population", fields.IntegerField), +]) + +# Data from: +countries = Table(fields=country_fields) +countries.append({"name": "Argentina", "population": "45101781"}) +countries.append({"name": "Brazil", "population": "212392717"}) +countries.append({"name": "Colombia", "population": "49849818"}) +countries.append({"name": "Ecuador", "population": "17100444"}) +countries.append({"name": "Peru", "population": "32933835"}) ``` -Check also all the available field types inside `rows.fields`. +Then you can iterate over it: +```python +for country in countries: + print(country) +# Result: +# Row(name='Argentina', population=45101781) +# Row(name='Brazil', population=212392717) +# Row(name='Colombia', population=49849818) +# Row(name='Ecuador', population=17100444) +# Row(name='Peru', population=32933835) +# "Row" is a namedtuple created from `country_fields` + +# We've added population as a string, the library automatically converted to +# integer so we can also sum: +countries_population = sum(country.population for country in countries) +print(countries_population) # prints 357378595 +``` -### From a `list` of `dict`s - -A common use case is to have a `list` of `dict`s -- you can also import it, and -`rows` will automatically fill in the blanks (your `dict`s don't need to have -the same keys) and convert data: +You could also export this table to CSV or any other supported format: ```python import rows - -data = [{'name': 'Álvaro Justen', 'age': 30}, - {'name': 'Another Guy', 'age': 42},] -table = rows.import_from_dicts(data) +rows.export_to_csv(countries, "some-LA-countries.csv") ``` -In this case, `table.fields` will be created automatically (`rows` will -identify the field type for each `dict` key). - - -## Iterating over a `Table` - -You can iterate over a `Table` object and each returned object will be a -`namedtuple` where you can access row's data, like this: +If you had this file before, you could: ```python -def print_person(person): - print('{} is {} years old.'.format(person.name, person.age)) - - -for person in table: - # namedtuples are returned for each row - print_person(person) -``` - -The result: +import rows -```text -Álvaro Justen is 30 years old. -Another Guy is 42 years old. +countries = rows.import_from_csv("some-LA-countries.csv") +for country in countries: + print(country) +# And the result will be the same. + +# Since the library has an automatic type detector, the "population" column +# will be detected and converted to integer. Let's see the detected types: +print(countries.fields) +# Result: +# OrderedDict([ +# ('name', ), +# ('population', ) +# ]) ``` -## Automatic type detection/convertion +### From a `list` of `dict`s -`rows` will automatically identify data type for each column and converts it -for you. For example: +If you have the data in a list of dictionaries already you can simply use +`rows.import_from_dicts`: ```python -table.append({'name': '...', 'age': ''}) -print_person(table[-1]) # yes, you can index it! -``` - -And the output: +import rows -```text -... is None years old. +data = [ + {"name": "Argentina", "population": "45101781"}, + {"name": "Brazil", "population": "212392717"}, + {"name": "Colombia", "population": "49849818"}, + {"name": "Ecuador", "population": "17100444"}, + {"name": "Peru", "population": "32933835"}, + {"name": "Guyana", }, # Missing "population", will fill with `None` +] +table = rows.import_from_dicts(data) +print(table[-1]) # Can use indexes +# Result: +# Row(name='Guyana', population=None) ``` -## Importing Data +## Importing from other formats -`rows` will help you importing data: its plugins will do the hard job of -parsing each supported file format so you don't need to. They can help you -exporting data also. For example, let's download a CSV from the Web and import -it: +`rows`' ability to import data is amazing: its plugins will do the hard job of +parsing the file format so you don't need to. They can help you exporting data +also. For example, let's download a CSV from the Web and import it: ```python import requests import rows from io import BytesIO -url = 'http://unitedstates.sunlightfoundation.com/legislators/legislators.csv' +url = "http://unitedstates.sunlightfoundation.com/legislators/legislators.csv" csv = requests.get(url).content # Download CSV data legislators = rows.import_from_csv(BytesIO(csv)) # already imported! -print('Hey, rows automatically identified the types:') +print("rows automatically identified the types:") for field_name, field_type in legislators.fields.items(): - print('{} is {}'.format(field_name, field_type)) + print(f"{field_name} is {field_type}") ``` And you'll see something like this: ```text [...] -in_office is gender is [...] +govtrack_id is +[...] birthdate is +[...] ``` +> Note that **native Python objects** are returned for each row inside a +> `namedtuple`! The library recognizes each field type and converts it +> *automagically* no matter which plugin you're using to import the data. + We can then work on this data: ```python women = sum(1 for row in legislators if row.in_office and row.gender == 'F') men = sum(1 for row in legislators if row.in_office and row.gender == 'M') -print('Women vs Men (in office): {} vs {}.'.format(women, men)) -``` - -Then you'll see effects of our sexist society: - -```text -Women vs Men: 108 vs 432. +print(f"Women vs Men (in office): {women} vs {men}.") +# Result: +# Women vs Men: 108 vs 432. ``` -Now, let's compare ages: +Since `birthdate` is automatically detected and converted to a +`rows.fields.DateField` we can do some quick analysis: ```python -legislators.order_by('birthdate') +legislators.order_by("birthdate") older, younger = legislators[-1], legislators[0] -print('{}, {} is older than {}, {}.'.format( - older.lastname, older.firstname, younger.lastname, younger.firstname)) -``` - -The output: - -```text -Stefanik, Elise is older than Byrd, Robert. +print(f"{older.lastname}, {older.firstname} is older than {younger.lastname}, {younger.firstname}.") +# Result: +# Stefanik, Elise is older than Byrd, Robert. ``` You can also get a whole column, like this: ```python ->>> legislators['gender'] -['M', - 'M', - 'M', - 'M', - 'M', - 'M', - 'M', - 'M', - 'M', - 'M', - 'M', - 'M', - 'M', - 'M', - 'F', - 'M', - ...] +print(legislators["gender"]) +# Result (a list of strings): +# ['M', 'M', 'M', 'M', 'M', 'M', ..., 'M', 'M', 'F'] ``` And change the whole column (or add a new one): ```python ->>> legislators['gender'] = ['male' if gender == 'M' else 'female' - for gender in legislators['gender']] ->>> legislators['gender'] -['male', - 'male', - 'male', - 'male', - 'male', - 'male', - 'male', - 'male', - 'male', - 'male', - 'male', - 'male', - 'female', - 'male', - ...] +legislators["gender"] = [ + "male" if gender == "M" else "female" + for gender in legislators["gender"] +] +print(legislators["gender"]) +# Result: +# ['male', 'male', 'male', ..., 'male', 'female'] ``` Or delete it: ```python ->>> 'gender' in legislators.field_names -True ->>> del legislators['gender'] ->>> 'gender' in legislators.field_names -False ->>> legislators[0].gender -[...] -AttributeError: 'Row' object has no attribute 'gender' +print("gender" in legislators.field_names) +# Result: True +del legislators["gender"] +print("gender" in legislators.field_names) +# Result: False +print(legislators[0].gender) +# Raises the exception: +# AttributeError: 'Row' object has no attribute 'gender' ``` -> Note that **native Python objects** are returned for each row inside a -> `namedtuple`! The library recognizes each field type and converts it -> *automagically* no matter which plugin you're using to import the data. +Exercise: use `rows.import_from_html` to import [population data from +worldometers.com][worldometers-population-table] (tip: you must run +`pip install rows[html]` first to install the needed dependencies). ### Common Parameters @@ -214,7 +204,7 @@ are: detection of types). - `force_types`: a `dict` mapping field names to field types you'd like to force, so `rows` won't try to detect it. Example: - `{'name': rows.fields.TextField, 'age': rows.fields.IntegerField}`. + `{"population": rows.fields.IntegerField}`. - `skip_header`: Ignore header row. Only used if `fields` is not `None`. Default: `True`. - `import_fields`: a `list` with field names to import (other fields will be @@ -231,13 +221,12 @@ If you have a `Table` object you can export it to all available plugins which have the "export" feature. Let's use the HTML plugin: ```python -rows.export_to_html(legislators, 'legislators.html') +rows.export_to_html(legislators, "legislators.html") ``` -And you'll get: +And you'll get a file with the following contents: -```bash -$ head legislators.html +```html @@ -248,16 +237,21 @@ $ head legislators.html +[...] + + + +
lastname name_suffix nickname
``` ### Exporting to memory -For some plugins you don't need to specify a filename, so the result will be -returned for you as a `str`. Example: +Some plugins don't require a filename to export to, so you can get the result +as a string, for example: ```python -fields_to_export = ('title', 'firstname', 'lastname', 'party') +fields_to_export = ("title", "firstname", "lastname", "party") content = rows.export_to_txt(legislators, export_fields=fields_to_export) print(content) ``` @@ -281,10 +275,28 @@ The result will be: +-------+-------------+--------------------+-------+ ``` -The plugins `csv`, `json` and `html` will have the same behaviour. +The plugins `csv`, `json` and `html` have this behaviour. -#### Using file-objects +It makes sense on file-oriented formats to returned the data as output, but +some plugins return different objects; on `sqlite` the returned object is +a `sqlite3.Connection`, see: + +```python +connection = rows.export_to_sqlite(legislators, ":memory:") +query = "SELECT firstname, lastname FROM table1 WHERE birthdate > 1980-01-01" +connection = rows.export_to_sqlite(legislators, ":memory:") +print(list(connection.execute(query).fetchall())) +``` + +You'll get the following output: + +```text +[('Darren', 'Soto'), ('Adam', 'Kinzinger'), ('Ron', 'DeSantis'), (...)] +``` + + +#### Using file and connection objects The majority of plugins also accept file-objects instead of filenames (for importing and also for exporting), for example: @@ -304,24 +316,10 @@ The following text will be printed: b"title,firstname,lastname,party\r\nSen,Robert,Byrd,D\r\nRep,Ralph,Hall,R[...]" ``` -On `sqlite` plugin the returned object is a `sqlite3.Connection`: - -```python -connection = rows.export_to_sqlite(legislators, ':memory:') -query = 'SELECT firstname, lastname FROM table1 WHERE birthdate > 1980-01-01' -connection = rows.export_to_sqlite(legislators, ':memory:') -print(list(connection.execute(query).fetchall())) -``` - -You'll get the following output: - -```text -[('Darren', 'Soto'), ('Adam', 'Kinzinger'), ('Ron', 'DeSantis'), (...)] -``` - -And you can use `sqlite3.Connection` when importing, too: +The same happens for `sqlite3.Connection` objects when importing: ```python +# Reuses the `connection` and `query` variables from the last sections' example table = rows.import_from_sqlite(connection, query=query) print(rows.export_to_txt(table)) ``` @@ -359,11 +357,11 @@ The following output will be printed: +-----------+-----------------+ ``` - ## Learn more Now you have finished the quickstart guide. See the [examples][rows-examples] folder for more examples. -[rows-examples]: https://github.com/turicas/rows/tree/develop/examples +[rows-examples]: https://github.com/turicas/rows/tree/master/examples +[worldometers-population-table]: http://www.worldometers.info/world-population/population-by-country/ diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 00000000..5b9ab9b1 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,2 @@ +site_name: rows 0.4.1 documentation +theme: readthedocs diff --git a/requirements-development.txt b/requirements-development.txt index 82c49a9f..f9e02f0b 100644 --- a/requirements-development.txt +++ b/requirements-development.txt @@ -28,5 +28,7 @@ tox yanc # Doc tools -sphinx==1.5 -recommonmark +click-man +ghp-import +mkdocs +pycco diff --git a/rows/__init__.py b/rows/__init__.py index c6bfbfba..fd52e4c8 100644 --- a/rows/__init__.py +++ b/rows/__init__.py @@ -75,4 +75,4 @@ import_from_pdf = plugins.pdf.import_from_pdf -__version__ = "0.4.0" +__version__ = "0.4.1" diff --git a/rows/fields.py b/rows/fields.py index ac01fd17..ed80565c 100644 --- a/rows/fields.py +++ b/rows/fields.py @@ -200,10 +200,10 @@ def deserialize(cls, value, *args, **kwargs): raise ValueError("It's float, not integer") else: value = new_value - elif isinstance(value, six.text_type) and value.startswith("0"): - raise ValueError("It's string, not integer") value = as_string(value) + if value != "0" and value.startswith("0"): + raise ValueError("It's string, not integer") return int(value) if SHOULD_NOT_USE_LOCALE else locale.atoi(value) diff --git a/rows/table.py b/rows/table.py index c55c9ada..8c3d4875 100644 --- a/rows/table.py +++ b/rows/table.py @@ -61,7 +61,9 @@ def field_types(self): def name(self): """Define table name based on its metadata (filename used on import) - If `filename` is not available, return `table1`.""" + If `filename` is not available, return `table1`. + """ + from rows.plugins import utils # TODO: may try read meta['name'] also (some plugins may set it) diff --git a/rows/utils.py b/rows/utils.py index 0c2ff865..2a637ced 100644 --- a/rows/utils.py +++ b/rows/utils.py @@ -864,6 +864,7 @@ def pgexport( Required: psql command """ + if isinstance(dialect, six.text_type): dialect = csv.get_dialect(dialect) @@ -910,7 +911,8 @@ def generate_schema(table, export_fields, output_format, output_fobj): Current supported output formats: 'txt', 'sql' and 'django'. The table name and all fields names pass for a slugifying process (table - name is taken from file name).""" + name is taken from file name). + """ if output_format == "txt": from rows.plugins.dicts import import_from_dicts @@ -953,7 +955,7 @@ def generate_schema(table, export_fields, output_format, output_fobj): CREATE TABLE IF NOT EXISTS {name} ( {fields} ); - """ + """ ) .strip() .format(name=table.name, fields=",\n".join(fields)) diff --git a/setup.py b/setup.py index 2613704f..502dba5a 100644 --- a/setup.py +++ b/setup.py @@ -52,7 +52,7 @@ "A common, beautiful interface to tabular data, " "no matter the format" ), long_description=LONG_DESCRIPTION, - version="0.4.0", + version="0.4.1", author="Álvaro Justen", author_email="alvarojusten@gmail.com", url="https://github.com/turicas/rows/", diff --git a/tests/tests_fields.py b/tests/tests_fields.py index 365439ce..d5ca6027 100644 --- a/tests/tests_fields.py +++ b/tests/tests_fields.py @@ -149,6 +149,8 @@ def test_IntegerField(self): with self.assertRaises(ValueError): fields.IntegerField.deserialize("013") + self.assertEqual(fields.IntegerField.deserialize("0"), 0) + def test_FloatField(self): self.assertEqual(fields.FloatField.TYPE, (float,)) self.assertEqual(fields.FloatField.serialize(None), "")