pre-commit · raddessi · Nov 14, 2019 · Nov 14, 2019 · Nov 14, 2019 · Feb 13, 2021
diff --git a/identify/extensions.py b/identify/extensions.py
@@ -52,9 +52,10 @@
     'gif': {'binary', 'image', 'gif'},
     'go': {'text', 'go'},
     'gotmpl': {'text', 'gotmpl'},
+    'gpg': {'text', 'gnupg'},
-    'gpg': {'text', 'gnupg'},
+    'gpg': {'binary', 'gnupg'},
-    'gpg': {'text', 'gnupg'},
+    'gpg': {'binary', 'gnupg'},
     'gpx': {'text', 'gpx', 'xml'},
-    'graphql': {'text', 'graphql'},
     'gradle': {'text', 'groovy'},
+    'graphql': {'text', 'graphql'},
     'groovy': {'text', 'groovy'},
     'gyb': {'text', 'gyb'},
     'gyp': {'text', 'gyp', 'python'},
@@ -100,6 +101,7 @@
     'lr': {'text', 'lektor'},
     'lua': {'text', 'lua'},
     'm': {'text', 'c', 'objective-c'},
+    'mako': {'text', 'mako'},
     'manifest': {'text', 'manifest'},
     'map': {'text', 'map'},
     'markdown': {'text', 'markdown'},
@@ -179,6 +181,7 @@
     'tgz': {'binary', 'gzip'},
     'thrift': {'text', 'thrift'},
     'tiff': {'binary', 'image', 'tiff'},
+    'tmpl': {'text', 'cheetah'},
     'toml': {'text', 'toml'},
     'ts': {'text', 'ts'},
     'tsx': {'text', 'tsx'},
@@ -223,6 +226,15 @@
 EXTENSIONS_NEED_BINARY_CHECK = {
     'plist': {'plist'},
 }
+# This should contain a map of file extensions to a map of interpreter names to
+# their own file extensions
+EXTENSIONS_NEED_SHEBANG_CHECK = {
+    'sls': {
+        'pydsl': 'py',
+        'pyobjects': 'py',
+        'cheetah': 'tmpl',
+    },
+}
 
 NAMES = {
     '.babelrc': EXTENSIONS['json'] | {'babelrc'},

diff --git a/identify/identify.py b/identify/identify.py
@@ -60,6 +60,8 @@ def tags_from_path(path):
             if len(shebang) > 0:
                 tags.update(tags_from_interpreter(shebang[0]))
 
+    tags.update(tags_from_extension_specific_shebang(path))
+
     # some extensions can be both binary and text
     # see EXTENSIONS_NEED_BINARY_CHECK
     if not {TEXT, BINARY} & tags:
@@ -73,6 +75,42 @@ def tags_from_path(path):
     return tags
 
 
+def tags_from_extension_specific_shebang(path):
+    """Match tags from an extension that we need to read the shebang from."""
+    _, filename = os.path.split(path)
+    _, ext = os.path.splitext(filename)
+    ret = set()
+    if ext.lstrip('.') not in extensions.EXTENSIONS_NEED_SHEBANG_CHECK:
+        return ret
+
+    interpreter_to_extension_map = extensions.EXTENSIONS_NEED_SHEBANG_CHECK[
+        ext.lstrip('.')
+    ]
+
+    with open(path, 'rb') as f:
+        shebang = parse_shebang(f)
+
+    if ext == '.sls':
+        if shebang:
+            # try to match tags for the file extension of the first interpreter
+            try:
+                first_interpreter = shebang[0].split('|')[0]
+                ret.update(
+                    extensions.EXTENSIONS[
+                        interpreter_to_extension_map.get(
+                            first_interpreter, first_interpreter,
+                        )
+                    ],
+                )
+            except (IndexError, KeyError):
+                pass
+        else:
+            # the default interpreter is jinja
+            ret.update(extensions.EXTENSIONS['jinja'])
+
+    return ret
+
+
 def tags_from_filename(filename):
     _, filename = os.path.split(filename)
     _, ext = os.path.splitext(filename)

diff --git a/tests/identify_test.py b/tests/identify_test.py
@@ -118,6 +118,55 @@ def test_tags_from_path_plist_text(tmpdir):
     }
 
 
+def test_tags_from_extension_specific_shebang_executable_file(tmpdir):
+    x = tmpdir.join('test.sls')
+    x.write('')
+    make_executable(x.strpath)
+    assert identify.tags_from_extension_specific_shebang(x.strpath) == {
+        'jinja',
+        'text',
+    }
+
+
+@pytest.mark.parametrize(
+    ('interpreter', 'expected'),
+    (
+        ('cheetah', {'text', 'cheetah'}),
+        ('dson', set()),
+        ('genshi', set()),
+        ('gpg', {'text', 'gnupg'}),
+        ('jinja', {'text', 'jinja'}),
+        ('jinja|py', {'text', 'jinja'}),
+        ('jinja|yaml', {'text', 'jinja'}),
+        ('jinja|yaml|gpg', {'text', 'jinja'}),
+        ('mako', {'text', 'mako'}),
+        ('py', {'text', 'python'}),
+        ('pydsl', {'text', 'python'}),
+        ('pyobjects', {'text', 'python'}),
+        ('wempy', set()),
+        ('yaml', {'text', 'yaml'}),
+        ('yamlex', set()),
+        ('yaml|gpg', {'text', 'yaml'}),
+    ),
+)
+@pytest.mark.parametrize(
+    ('shebang_prefix',),
+    (
+        ('#!',),
+        ('#! ',),
+    ),
+)
+def test_tags_from_extension_specific_shebang(
+    tmpdir,
+    shebang_prefix,
+    interpreter,
+    expected,
+):
+    x = tmpdir.join('test.sls')
+    x.write(shebang_prefix + interpreter)
+    assert identify.tags_from_extension_specific_shebang(x.strpath) == expected
+
+
 @pytest.mark.parametrize(
     ('filename', 'expected'),
     (