-
Notifications
You must be signed in to change notification settings - Fork 766
feat(macos): implement dyld chained fixups for arm64 #1603
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -422,8 +422,95 @@ def loadMacho(self, depth=0, isdyld=False): | |
| if self.ql.arch.type == QL_ARCH.X8664: | ||
| load_commpage(self.ql) | ||
|
|
||
| if depth == 0 and self.is_driver is False and self.ql.arch.type == QL_ARCH.ARM64: | ||
| self.dyld_chained_fixups() | ||
|
|
||
| return self.proc_entry | ||
|
|
||
|
|
||
| def dyld_chained_fixups(self): | ||
| # Only support arm64 for now | ||
| all_imports = {} | ||
|
|
||
| chained_fixups = self.macho_file.chained_fixups | ||
| if chained_fixups is None: | ||
| return | ||
|
|
||
| can_resolve_binds = chained_fixups.header.imports_format == 1 # Only support format 1 for now | ||
| for starts_in_seg in chained_fixups.starts_in_segment: | ||
| if starts_in_seg is None: | ||
| continue | ||
|
|
||
| pointer_format = starts_in_seg.pointer_format | ||
| for page_idx in range(starts_in_seg.page_count): | ||
| start_offset = starts_in_seg.page_start[page_idx] | ||
| if start_offset == 0xFFFF: | ||
| continue | ||
|
|
||
| page_file_offset = starts_in_seg.segment_offset + (page_idx * starts_in_seg.page_size) | ||
| chain_cursor_ptr = self.load_address + self.slide + page_file_offset + start_offset | ||
| done = False | ||
|
|
||
| while not done: | ||
| target_offset = 0 | ||
| if pointer_format == DYLD_CHAINED_PTR_64: | ||
| value = self.ql.unpack64(self.ql.mem.read(chain_cursor_ptr, 8)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of |
||
| target = value & 0xFFFFFFFFF | ||
| high8 = (value >> 36) & 0xFF | ||
| next_stride = (value >> 51) & 0xFFF | ||
| is_bind = (value >> 63) & 0x1 == 1 | ||
| if is_bind is False: target_offset = target | (high8 << 36) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please avoid writing the |
||
| else: | ||
| raise QlErrorMACHOFormat("Unsupported pointer format in chained fixups: {}".format(pointer_format)) | ||
|
|
||
| if is_bind is False: | ||
| corrected_addr = self.load_address + self.slide + target_offset | ||
| if pointer_format == DYLD_CHAINED_PTR_32: | ||
| self.ql.mem.write(chain_cursor_ptr, self.ql.pack32(corrected_addr)) | ||
| else: | ||
| self.ql.mem.write(chain_cursor_ptr, self.ql.pack64(corrected_addr)) | ||
|
Comment on lines
+468
to
+470
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of |
||
| else: | ||
| if not can_resolve_binds: | ||
| raise QlErrorMACHOFormat("Cannot resolve binds in chained fixups") | ||
|
|
||
| ordinal = 0 | ||
| if pointer_format == DYLD_CHAINED_PTR_64: | ||
| value = self.ql.unpack64(self.ql.mem.read(chain_cursor_ptr, 8)) | ||
| ordinal = value & 0xFFFFFF | ||
| else: | ||
| raise QlErrorMACHOFormat("Unsupported pointer format in chained fixups: {}".format(pointer_format)) | ||
|
|
||
| if ordinal < chained_fixups.header.imports_count: | ||
| import_entry = chained_fixups.imports[ordinal] | ||
| all_imports[chain_cursor_ptr] = import_entry.symbol_name | ||
|
|
||
| if next_stride == 0: | ||
| done = True | ||
| else: | ||
| chain_cursor_ptr += next_stride * 4 | ||
|
|
||
|
|
||
| self.import_symbols = {} | ||
| if len(all_imports) != 0: | ||
| self.static_addr = self.vm_end_addr | ||
| self.static_size = self.ql.mem.align_up(len(all_imports) * 4) | ||
|
|
||
| self.ql.mem.map(self.static_addr, self.static_size, info="[STATIC]") | ||
| self.vm_end_addr += self.static_size | ||
| self.ql.log.info("Memory for external static symbol is created at 0x%x with size 0x%x" % (self.static_addr, | ||
| self.static_size)) | ||
| jump = self.static_addr | ||
| for fixup_addr in all_imports: | ||
| self.import_symbols[jump] = { | ||
| 'ptr': fixup_addr, | ||
| 'name': all_imports[fixup_addr] | ||
| } | ||
|
|
||
| #self.ql.mem.write(jump, b'\x00\x00\x20\xD4') # brk #0 | ||
| self.ql.mem.write(jump, b'\xC0\x03\x5F\xD6') # ret | ||
| self.ql.mem.write(fixup_addr, self.ql.pack64(jump)) | ||
| jump += 4 | ||
|
|
||
|
|
||
| def loadSegment64(self, cmd, isdyld): | ||
| PAGE_SIZE = 0x1000 | ||
| if isdyld: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -39,6 +39,9 @@ def __init__(self, lc, data): | |
| self.rel_offset = lc.relocations_offset | ||
| self.rel_num = lc.number_of_relocations | ||
| self.flags = lc.flags | ||
| self.reserved1 = lc.reserved1 | ||
| self.reserved2 = lc.reserved2 | ||
| self.reserved3 = lc.reserved3 | ||
| self.content = data[self.offset : self.offset + self.size] | ||
|
|
||
| # def __str__(self): | ||
|
|
@@ -235,3 +238,116 @@ def __init__(self, lc, data): | |
|
|
||
| def __str__(self): | ||
| pass | ||
|
|
||
| class DyldChainedHeader: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use the classes available at
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for the suggestion to use the classes from qiling.os.struct. I noticed that data.py currently does not utilize qiling.os.struct. My main concern is about consistency: Should I refactor the existing code to align with this approach, or should I maintain the current implementation style within data.py? Furthermore, the current parsing logic operates using file offsets/pointers (or stream positions) rather than memory pointers. For example, calculating the offset is necessary to locate the next structure in the file. Using a memory-centric utility like qiling.os.struct might not be directly applicable or could introduce unnecessary complexity for file-based parsing. Could you provide further guidance on how to best handle this disparity?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is a valid question. I am not sure I am entirely following on the other comment about pointers and structures. The base classes available in If there are more specific questions, hit me up on Telegram, that would be much quicker. |
||
| def __init__(self, data): | ||
| ''' | ||
| struct dyld_chained_header { | ||
| uint32_t fixups_version; | ||
| uint32_t starts_offset; | ||
| uint32_t imports_offset; | ||
| uint32_t symbols_offset; | ||
| uint32_t imports_count; | ||
| uint32_t imports_format; | ||
| uint32_t symbols_format; | ||
| }; | ||
| ''' | ||
| self.fixups_version = unpack("<L", data[0:4])[0] | ||
| self.starts_offset = unpack("<L", data[4:8])[0] | ||
| self.imports_offset = unpack("<L", data[8:12])[0] | ||
| self.symbols_offset = unpack("<L", data[12:16])[0] | ||
| self.imports_count = unpack("<L", data[16:20])[0] | ||
| self.imports_format = unpack("<L", data[20:24])[0] | ||
| self.symbols_format = unpack("<L", data[24:28])[0] | ||
|
|
||
| class DyldChainedStartsInImage: | ||
| def __init__(self, data): | ||
| ''' | ||
| struct dyld_chained_starts_in_image { | ||
| uint32_t seg_count; | ||
| uint32_t seg_info_offset[1]; | ||
| }; | ||
| ''' | ||
| self.seg_count = unpack("<L", data[0:4])[0] | ||
| self.seg_info_offset = [] | ||
| slide = 4 | ||
| for i in range(self.seg_count): | ||
| self.seg_info_offset.append(unpack("<L", data[slide:slide +4])[0]) | ||
| slide +=4 | ||
|
|
||
|
|
||
| class DyldChainedStartsInSegment: | ||
| def __init__(self, data): | ||
| ''' | ||
| struct dyld_chained_starts_in_segment { | ||
| uint32_t size; | ||
| uint16_t page_size; | ||
| uint16_t pointer_format; | ||
| uint64_t segment_offset; | ||
| uint32_t max_valid_pointer; | ||
| uint16_t page_count; | ||
| uint16_t page_start[1]; | ||
| }; | ||
| ''' | ||
| self.size = unpack("<L", data[0:4])[0] | ||
| self.page_size = unpack("<H", data[4:6])[0] | ||
| self.pointer_format = unpack("<H", data[6:8])[0] | ||
| self.segment_offset = unpack("<Q", data[8:16])[0] | ||
| self.max_valid_pointer = unpack("<L", data[16:20])[0] | ||
| self.page_count = unpack("<H", data[20:22])[0] | ||
| self.page_start = [] | ||
| slide = 22 | ||
| for i in range(self.page_count): | ||
| self.page_start.append(unpack("<H", data[slide:slide +2])[0]) | ||
| slide +=2 | ||
|
|
||
| class DyldChainedImport: | ||
| def __init__(self, data): | ||
| ''' | ||
| struct dyld_chained_import { | ||
| uint32_t lib_ordinal : 8; | ||
| uint32_t weak_import : 1; | ||
| uint32_t name_offset : 23; | ||
| }; | ||
| ''' | ||
| tmp = unpack("<L", data[0:4])[0] | ||
| self.lib_ordinal = tmp & 0xff | ||
| tmp >>= 8 | ||
| self.weak_import = tmp & 0x1 | ||
| tmp >>= 1 | ||
| self.name_offset = tmp & 0x7fffff | ||
|
|
||
| self.symbol_name = None # to be filled later | ||
|
|
||
| class ChainedFixups: | ||
| def __init__(self, lc, data): | ||
| self.offset = lc.data_offset | ||
| self.size = lc.data_size | ||
| self.content = data[self.offset : self.offset + self.size] | ||
|
|
||
| self.header = DyldChainedHeader(self.content) | ||
| self.starts_in_image = DyldChainedStartsInImage(self.content[self.header.starts_offset:]) | ||
| self.starts_in_segment = [] | ||
| for i in range(self.starts_in_image.seg_count): | ||
| seg_offset = self.starts_in_image.seg_info_offset[i] | ||
| if seg_offset == 0: | ||
| self.starts_in_segment.append(None) | ||
| continue | ||
|
|
||
| seg_data = self.content[self.header.starts_offset + seg_offset:] | ||
| self.starts_in_segment.append(DyldChainedStartsInSegment(seg_data)) | ||
|
|
||
| self.imports = [] | ||
| symbol_pool = self.content[self.header.symbols_offset:] | ||
| for i in range(self.header.imports_count): | ||
| import_offset = self.header.imports_offset + i * 4 | ||
| import_data = self.content[import_offset : import_offset + 4] | ||
|
|
||
| import_entry = DyldChainedImport(import_data) | ||
| import_entry.symbol_name = symbol_pool[import_entry.name_offset :].split(b'\0', 1)[0].decode() | ||
| self.imports.append(import_entry) | ||
|
|
||
|
|
||
|
|
||
| # def __str__(self): | ||
| # return (" ChainedFixupsInfo: content {}".format(self.content)) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Writing
is Falseoris Trueis not conventional.Consider just using
if self.is_driverorif not self.is_driver