Skip to content

Commit 7e3e6a8

Browse files
authored
fix: resolve 'bytes' object has no attribute 'decoded' for large CODEOWNERS files (#379)
Fixes #378 ## What Normalize CODEOWNERS file contents to bytes before processing, so both the normal path (Contents object with .decoded) and the large-file path (raw bytes from blob().decode_content()) use the same type downstream. ## Why When a CODEOWNERS file is large enough that the GitHub API returns null content, the code fetches it via blob().decode_content() which returns raw bytes. The code then called .decoded on that bytes object, crashing with "'bytes' object has no attribute 'decoded'" and preventing PR creation for user removal. ## Notes - The isinstance check in get_usernames_from_codeowners handles both bytes lines and str lines, since the function now accepts raw bytes or str directly - Existing tests updated to pass bytes/str directly instead of mocking .decoded, matching the new contract Signed-off-by: jmeridth <jmeridth@gmail.com>
1 parent 4597504 commit 7e3e6a8

2 files changed

Lines changed: 24 additions & 12 deletions

File tree

cleanowners.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -133,12 +133,14 @@ def main(): # pragma: no cover
133133

134134
if codeowners_file_contents.content is None:
135135
# This is a large file so we need to get the sha and download based off the sha
136-
codeowners_file_contents = repo.blob(
136+
codeowners_decoded = repo.blob(
137137
repo.file_contents(codeowners_filepath).sha
138138
).decode_content()
139+
else:
140+
codeowners_decoded = codeowners_file_contents.decoded
139141

140142
# Extract the usernames from the CODEOWNERS file
141-
usernames = get_usernames_from_codeowners(codeowners_file_contents)
143+
usernames = get_usernames_from_codeowners(codeowners_decoded)
142144

143145
usernames_to_remove = []
144146
codeowners_file_contents_new = None
@@ -160,10 +162,8 @@ def main(): # pragma: no cover
160162
# Remove that username from the codeowners_file_contents
161163
file_changed = True
162164
bytes_username = f"@{username}".encode("ASCII")
163-
codeowners_file_contents_new = (
164-
codeowners_file_contents.decoded.replace(
165-
bytes_username, b""
166-
)
165+
codeowners_file_contents_new = codeowners_decoded.replace(
166+
bytes_username, b""
167167
)
168168

169169
# Store the repo and users to remove for reporting later
@@ -291,9 +291,9 @@ def get_repos_iterator(organization, repository_list, github_connection):
291291
def get_usernames_from_codeowners(codeowners_file_contents, ignore_teams=True):
292292
"""Extract the usernames from the CODEOWNERS file"""
293293
usernames = []
294-
for line in codeowners_file_contents.decoded.splitlines():
294+
for line in codeowners_file_contents.splitlines():
295295
if line:
296-
line = line.decode()
296+
line = line.decode() if isinstance(line, bytes) else line
297297
# skip comments
298298
if line.lstrip().startswith("#"):
299299
continue

test_cleanowners.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,7 @@ class TestGetUsernamesFromCodeowners(unittest.TestCase):
103103

104104
def test_get_usernames_from_codeowners_ignore_teams(self):
105105
"""Test the get_usernames_from_codeowners function."""
106-
codeowners_file_contents = MagicMock()
107-
codeowners_file_contents.decoded = """
106+
codeowners_file_contents = """
108107
# Comment
109108
@user1
110109
@user2
@@ -120,8 +119,7 @@ def test_get_usernames_from_codeowners_ignore_teams(self):
120119

121120
def test_get_usernames_from_codeowners_with_teams(self):
122121
"""Test the get_usernames_from_codeowners function."""
123-
codeowners_file_contents = MagicMock()
124-
codeowners_file_contents.decoded = """
122+
codeowners_file_contents = """
125123
# Comment
126124
@user1
127125
@user2
@@ -135,6 +133,20 @@ def test_get_usernames_from_codeowners_with_teams(self):
135133

136134
self.assertEqual(result, expected_usernames)
137135

136+
def test_get_usernames_from_codeowners_with_raw_bytes(self):
137+
"""Test that get_usernames_from_codeowners works with raw bytes (large file path).
138+
139+
Regression test for https://github.com/github-community-projects/cleanowners/issues/378
140+
When a CODEOWNERS file is large, blob().decode_content() returns raw bytes
141+
instead of a Contents object with a .decoded attribute.
142+
"""
143+
codeowners_file_contents = b"* @user1 @user2\ndocs/* @user3\n"
144+
expected_usernames = ["user1", "user2", "user3"]
145+
146+
result = get_usernames_from_codeowners(codeowners_file_contents)
147+
148+
self.assertEqual(result, expected_usernames)
149+
138150

139151
class TestGetOrganization(unittest.TestCase):
140152
"""Test the get_org function in cleanowners.py"""

0 commit comments

Comments
 (0)