Skip to content

Commit bd72190

Browse files
committed
Implement CLI
1 parent 0b16a80 commit bd72190

File tree

7 files changed

+469
-9
lines changed

7 files changed

+469
-9
lines changed

README.rst

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,13 @@ Iterate large directories efficiently with python.
77
About
88
=====
99

10-
``python-getdents`` is a simple wrapper around Linux system call ``getdents64`` (see ``man getdents`` for details). `Here's <http://be-n.com/spw/you-can-list-a-million-files-in-a-directory-but-not-with-ls.html>`_ some study on why ``ls``, ``os.listdir()`` and others are so slow when dealing with extremely large directories.
11-
10+
``python-getdents`` is a simple wrapper around Linux system call ``getdents64`` (see ``man getdents`` for details). `More details <http://be-n.com/spw/you-can-list-a-million-files-in-a-directory-but-not-with-ls.html>`_ on approach.
1211

1312
TODO
1413
====
1514

1615
* Verify that implementation works on platforms other than ``x86_64``.
1716

18-
1917
Install
2018
=======
2119

@@ -30,7 +28,7 @@ For development
3028
3129
python3 -m venv env
3230
. env/bin/activate
33-
pip install -e .
31+
pip install -e .[test]
3432
3533
Run tests
3634
=========
@@ -83,3 +81,56 @@ Advanced
8381
)
8482
8583
os.close(fd)
84+
85+
CLI
86+
---
87+
88+
Usage
89+
~~~~~
90+
91+
::
92+
93+
python-getdents [-h] [-b N] [-o NAME] PATH
94+
95+
Options
96+
~~~~~~~
97+
98+
+--------------------------+-------------------------------------------------+
99+
| Option | Description |
100+
+==========================+=================================================+
101+
| ``-b N`` | Buffer size (in bytes) to allocate when |
102+
| | iterating over directory. Default is 32768, the |
103+
| | same value used by glibc, you probably want to |
104+
+--------------------------+ increase this value. Try starting with 16777216 |
105+
| ``--buffer-size N`` | (16 MiB). Best performance is achieved when |
106+
| | buffer size rounds to size of the file system |
107+
| | block. |
108+
+--------------------------+-------------------------------------------------+
109+
| ``-o NAME`` | Output format: |
110+
| | |
111+
| | * ``plain`` (default) Print only names. |
112+
| | * ``csv`` Print as comma-separated values in |
113+
+--------------------------+ order: inode, type, name. |
114+
| ``--output-format NAME`` | * ``csv-headers`` Same as ``csv``, but print |
115+
| | headers on the first line also. |
116+
| | * ``json`` output as JSON array. |
117+
| | * ``json-stream`` output each directory entry |
118+
| | as single json object separated by newline. |
119+
+--------------------------+-------------------------------------------------+
120+
121+
Exit codes
122+
~~~~~~~~~~
123+
124+
* 3 - Requested buffer is too large
125+
* 4 - ``PATH`` not found.
126+
* 5 - ``PATH`` is not a directory.
127+
* 6 - Not enough permissions to read contents of the ``PATH``.
128+
129+
Examples
130+
~~~~~~~~
131+
132+
.. code-block:: sh
133+
134+
python-getdents /path/to/large/dir
135+
python -m getdents /path/to/large/dir
136+
python-getdents /path/to/large/dir -o csv -b 16777216 > dir.csv

getdents/__main__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from sys import exit
2+
3+
from . import __name__ as prog
4+
from .cli import main
5+
6+
7+
if __name__ == '__main__': # pragma: no cover
8+
exit(main(prog=prog))

getdents/cli.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
from argparse import ArgumentParser
2+
from sys import stderr
3+
4+
from . import MIN_GETDENTS_BUFF_SIZE, getdents
5+
from .formatters import FORMATTERS
6+
7+
8+
def parse_args(args, prog):
9+
parser = ArgumentParser(
10+
prog=prog,
11+
description='Print directory contents.',
12+
)
13+
14+
parser.add_argument('path', metavar='PATH')
15+
parser.add_argument(
16+
'-b', '--buffer-size',
17+
metavar='N',
18+
type=int,
19+
default=32768,
20+
help=(
21+
'Buffer size (in bytes) to allocate when iterating over directory'
22+
),
23+
)
24+
parser.add_argument(
25+
'-o', '--output-format',
26+
metavar='NAME',
27+
default='plain',
28+
choices=list(FORMATTERS),
29+
help='Output format: %s' % ', '.join(sorted(FORMATTERS)),
30+
)
31+
32+
parsed_args = parser.parse_args(args)
33+
buff_size = parsed_args.buffer_size
34+
35+
if buff_size < MIN_GETDENTS_BUFF_SIZE:
36+
parser.error('Minimum buffer size is %s' % MIN_GETDENTS_BUFF_SIZE)
37+
38+
return parsed_args.path, buff_size, FORMATTERS[parsed_args.output_format]
39+
40+
41+
def main(args=None, prog=None):
42+
path, buff_size, fmt = parse_args(args, prog)
43+
44+
try:
45+
fmt(getdents(path, buff_size=buff_size))
46+
except MemoryError:
47+
print(
48+
'Not enough memory to allocate', buff_size, 'bytes of data',
49+
file=stderr,
50+
)
51+
return 3
52+
except FileNotFoundError as e:
53+
print(e, file=stderr)
54+
return 4
55+
except NotADirectoryError as e:
56+
print(e, file=stderr)
57+
return 5
58+
except PermissionError as e:
59+
print(e, file=stderr)
60+
return 6
61+
except OSError as e:
62+
print(e, file=stderr)
63+
return 7
64+
65+
return 0

getdents/formatters.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
from csv import writer as csv_writer
2+
from itertools import chain
3+
from json import dumps as json_dumps
4+
from sys import stdout
5+
6+
from ._getdents import (
7+
DT_BLK,
8+
DT_CHR,
9+
DT_DIR,
10+
DT_FIFO,
11+
DT_LNK,
12+
DT_REG,
13+
DT_SOCK,
14+
DT_UNKNOWN,
15+
)
16+
17+
18+
HEADER = ('inode', 'type', 'name')
19+
FORMATTERS = {}
20+
TYPE_NAMES = {
21+
DT_BLK: 'blk',
22+
DT_CHR: 'chr',
23+
DT_DIR: 'dir',
24+
DT_FIFO: 'fifo',
25+
DT_LNK: 'lnk',
26+
DT_REG: 'reg',
27+
DT_SOCK: 'sock',
28+
DT_UNKNOWN: 'unknown',
29+
}
30+
31+
32+
def formatter(name, registry=FORMATTERS):
33+
def deco(fn):
34+
registry[name] = fn
35+
return fn
36+
return deco
37+
38+
39+
@formatter('plain')
40+
def format_plain(directory_entries, file=stdout):
41+
for inode, type, name in directory_entries:
42+
print(name, file=file)
43+
44+
45+
class Echo:
46+
def write(self, value):
47+
return value
48+
49+
50+
@formatter('csv')
51+
def format_csv(directory_entries, file=stdout, headers=False):
52+
writer = csv_writer(Echo())
53+
54+
for first in directory_entries:
55+
if headers:
56+
print(writer.writerow(HEADER), end='', file=file)
57+
58+
for inode, type, name in chain((first,), directory_entries):
59+
print(
60+
writer.writerow((inode, TYPE_NAMES[type], name)),
61+
end='', file=file,
62+
)
63+
64+
65+
@formatter('csv-headers')
66+
def format_csv_headers(directory_entries, file=stdout):
67+
return format_csv(directory_entries, file=file, headers=True)
68+
69+
70+
def json_encode(inode, type, name):
71+
return json_dumps({
72+
'inode': inode,
73+
'type': TYPE_NAMES[type],
74+
'name': name,
75+
})
76+
77+
78+
@formatter('json')
79+
def format_json(directory_entries, file=stdout):
80+
for inode, type, name in directory_entries:
81+
print(
82+
'[\n', json_encode(inode, type, name),
83+
sep='', end='', file=file,
84+
)
85+
86+
for inode, type, name in directory_entries:
87+
print(
88+
',\n', json_encode(inode, type, name),
89+
sep='', end='', file=file,
90+
)
91+
92+
print('\n]', file=file)
93+
94+
95+
@formatter('json-stream')
96+
def format_json_stream(directory_entries, file=stdout):
97+
for inode, type, name in directory_entries:
98+
print(json_encode(inode, type, name), file=file)

setup.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
#!/usr/bin/env python
22

3-
from distutils.core import Extension
3+
from setuptools import Extension, find_packages, setup
44

5-
from setuptools import setup
65

6+
tests_require = ['pytest', 'pretend']
77

88
setup(
99
name='getdents',
10-
version='0.2',
10+
version='0.3',
1111
description='Python binding to linux syscall getdents64.',
1212
long_description=open('README.rst').read(),
1313
classifiers=[
@@ -22,12 +22,18 @@
2222
author_email='[email protected]',
2323
url='http://github.com/ZipFile/python-getdents',
2424
license='BSD-2-Clause',
25-
packages=['getdents'],
25+
packages=find_packages(exclude=['tests']),
2626
include_package_data=True,
2727
zip_safe=False,
28+
extras_require={
29+
'test': tests_require,
30+
},
2831
ext_modules=[
2932
Extension('getdents._getdents', sources=['getdents/_getdents.c']),
3033
],
34+
entry_points = {
35+
'console_scripts': ['python-getdents=getdents.cli:main'],
36+
},
3137
setup_requires=['pytest-runner'],
32-
tests_require=['pytest'],
38+
tests_require=tests_require,
3339
)

tests/test_cli.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import re
2+
3+
import pretend
4+
5+
from pytest import mark, raises
6+
7+
import getdents.cli as cli
8+
from getdents.cli import main, parse_args
9+
from getdents.formatters import (
10+
FORMATTERS,
11+
format_csv,
12+
format_json,
13+
format_plain,
14+
)
15+
16+
17+
@mark.parametrize(['args', 'expected'], [
18+
(['/tmp'], ('/tmp', 32768, format_plain)),
19+
(['-b', '1234', 'x', '-o', 'json'], ('x', 1234, format_json)),
20+
([
21+
'--buffer-size', '9999',
22+
'--output-format', 'csv',
23+
'xxx',
24+
], ('xxx', 9999, format_csv)),
25+
])
26+
def test_parse_args(args, expected):
27+
assert parse_args(args, 'test') == expected
28+
29+
30+
def test_parse_args_min_buff_size(capsys):
31+
with raises(SystemExit):
32+
parse_args(['test', '-b', '0'], 'test')
33+
34+
_, err = capsys.readouterr()
35+
36+
assert re.search(r'Minimum buffer size is \d+', err) is not None
37+
38+
39+
def test_main(monkeypatch):
40+
directory_entries = pretend.stub()
41+
42+
@pretend.call_recorder
43+
def format_test(directory_entries):
44+
pass
45+
46+
@pretend.call_recorder
47+
def getdents(path, buff_size=32768):
48+
return directory_entries
49+
50+
monkeypatch.setitem(FORMATTERS, 'test', format_test)
51+
monkeypatch.setattr(cli, 'getdents', getdents)
52+
53+
assert main(['x', '-o', 'test', '-b', '1024'], 'test') == 0
54+
assert getdents.calls == [pretend.call('x', buff_size=1024)]
55+
assert format_test.calls == [pretend.call(directory_entries)]
56+
57+
58+
def test_main_memory_error(monkeypatch):
59+
monkeypatch.setattr(cli, 'getdents', pretend.raiser(MemoryError))
60+
61+
assert main(['x']) == 3
62+
63+
64+
def test_main_file_not_found_error(monkeypatch):
65+
monkeypatch.setattr(cli, 'getdents', pretend.raiser(FileNotFoundError))
66+
67+
assert main(['x']) == 4
68+
69+
70+
def test_main_not_a_directory_error(monkeypatch):
71+
monkeypatch.setattr(cli, 'getdents', pretend.raiser(NotADirectoryError))
72+
73+
assert main(['x']) == 5
74+
75+
76+
def test_main_permission_error(monkeypatch):
77+
monkeypatch.setattr(cli, 'getdents', pretend.raiser(PermissionError))
78+
79+
assert main(['x']) == 6
80+
81+
82+
def test_main_os_error(monkeypatch):
83+
monkeypatch.setattr(cli, 'getdents', pretend.raiser(OSError))
84+
85+
assert main(['x']) == 7

0 commit comments

Comments
 (0)