Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 88 additions & 1 deletion qiling/loader/macho.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,8 +422,95 @@ def loadMacho(self, depth=0, isdyld=False):
if self.ql.arch.type == QL_ARCH.X8664:
load_commpage(self.ql)

if depth == 0 and self.is_driver is False and self.ql.arch.type == QL_ARCH.ARM64:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Writing is False or is True is not conventional.
Consider just using if self.is_driver or if not self.is_driver

self.dyld_chained_fixups()

return self.proc_entry


def dyld_chained_fixups(self):
# Only support arm64 for now
all_imports = {}

chained_fixups = self.macho_file.chained_fixups
if chained_fixups is None:
return

can_resolve_binds = chained_fixups.header.imports_format == 1 # Only support format 1 for now
for starts_in_seg in chained_fixups.starts_in_segment:
if starts_in_seg is None:
continue

pointer_format = starts_in_seg.pointer_format
for page_idx in range(starts_in_seg.page_count):
start_offset = starts_in_seg.page_start[page_idx]
if start_offset == 0xFFFF:
continue

page_file_offset = starts_in_seg.segment_offset + (page_idx * starts_in_seg.page_size)
chain_cursor_ptr = self.load_address + self.slide + page_file_offset + start_offset
done = False

while not done:
target_offset = 0
if pointer_format == DYLD_CHAINED_PTR_64:
value = self.ql.unpack64(self.ql.mem.read(chain_cursor_ptr, 8))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of ql.unpack(ql.mem.read(...)) use the ql.mem.read_ptr method.

target = value & 0xFFFFFFFFF
high8 = (value >> 36) & 0xFF
next_stride = (value >> 51) & 0xFFF
is_bind = (value >> 63) & 0x1 == 1
if is_bind is False: target_offset = target | (high8 << 36)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please avoid writing the True clause on the same line with the if.

else:
raise QlErrorMACHOFormat("Unsupported pointer format in chained fixups: {}".format(pointer_format))

if is_bind is False:
corrected_addr = self.load_address + self.slide + target_offset
if pointer_format == DYLD_CHAINED_PTR_32:
self.ql.mem.write(chain_cursor_ptr, self.ql.pack32(corrected_addr))
else:
self.ql.mem.write(chain_cursor_ptr, self.ql.pack64(corrected_addr))
Comment on lines +468 to +470
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of ql.mem.write(ql.pack(...)) use the ql.mem.write_ptr method.

else:
if not can_resolve_binds:
raise QlErrorMACHOFormat("Cannot resolve binds in chained fixups")

ordinal = 0
if pointer_format == DYLD_CHAINED_PTR_64:
value = self.ql.unpack64(self.ql.mem.read(chain_cursor_ptr, 8))
ordinal = value & 0xFFFFFF
else:
raise QlErrorMACHOFormat("Unsupported pointer format in chained fixups: {}".format(pointer_format))

if ordinal < chained_fixups.header.imports_count:
import_entry = chained_fixups.imports[ordinal]
all_imports[chain_cursor_ptr] = import_entry.symbol_name

if next_stride == 0:
done = True
else:
chain_cursor_ptr += next_stride * 4


self.import_symbols = {}
if len(all_imports) != 0:
self.static_addr = self.vm_end_addr
self.static_size = self.ql.mem.align_up(len(all_imports) * 4)

self.ql.mem.map(self.static_addr, self.static_size, info="[STATIC]")
self.vm_end_addr += self.static_size
self.ql.log.info("Memory for external static symbol is created at 0x%x with size 0x%x" % (self.static_addr,
self.static_size))
jump = self.static_addr
for fixup_addr in all_imports:
self.import_symbols[jump] = {
'ptr': fixup_addr,
'name': all_imports[fixup_addr]
}

#self.ql.mem.write(jump, b'\x00\x00\x20\xD4') # brk #0
self.ql.mem.write(jump, b'\xC0\x03\x5F\xD6') # ret
self.ql.mem.write(fixup_addr, self.ql.pack64(jump))
jump += 4


def loadSegment64(self, cmd, isdyld):
PAGE_SIZE = 0x1000
if isdyld:
Expand Down
27 changes: 26 additions & 1 deletion qiling/loader/macho_parser/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,29 @@
# UNIXTHREAD
X86_THREAD_STATE32 = 0x00000001
X86_THREAD_STATE64 = 0x00000004
ARM_THREAD_STATE64 = 0x00000006
ARM_THREAD_STATE64 = 0x00000006


SECTION_TYPE = 0x000000ff

# S_NON_LAZY_SYMBOL_POINTERS - Section with non-lazy symbol pointers.
S_NON_LAZY_SYMBOL_POINTERS = 0x06

# S_LAZY_SYMBOL_POINTERS - Section with lazy symbol pointers.
S_LAZY_SYMBOL_POINTERS = 0x07

INDIRECT_SYMBOL_ABS = 0x40000000
INDIRECT_SYMBOL_LOCAL = 0x80000000

# Pointer Formats
DYLD_CHAINED_PTR_ARM64E = 1
DYLD_CHAINED_PTR_64 = 2
DYLD_CHAINED_PTR_32 = 3
DYLD_CHAINED_PTR_32_CACHE = 4
DYLD_CHAINED_PTR_32_FIRMWARE = 5
DYLD_CHAINED_PTR_64_OFFSET = 6
DYLD_CHAINED_PTR_ARM64E_OFFSET = 7 # aka KERNEL
DYLD_CHAINED_PTR_64_KERNEL_CACHE = 8
DYLD_CHAINED_PTR_ARM64E_USERLAND24 = 9
DYLD_CHAINED_PTR_ARM64E_SHARED_CACHE = 10
DYLD_CHAINED_PTR_X86_64_KERNEL_CACHE = 11
116 changes: 116 additions & 0 deletions qiling/loader/macho_parser/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ def __init__(self, lc, data):
self.rel_offset = lc.relocations_offset
self.rel_num = lc.number_of_relocations
self.flags = lc.flags
self.reserved1 = lc.reserved1
self.reserved2 = lc.reserved2
self.reserved3 = lc.reserved3
self.content = data[self.offset : self.offset + self.size]

# def __str__(self):
Expand Down Expand Up @@ -235,3 +238,116 @@ def __init__(self, lc, data):

def __str__(self):
pass

class DyldChainedHeader:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use the classes available at qiling.os.struct to work with "C structures".
It is heavily documented and there are plenty of examples around the code.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the suggestion to use the classes from qiling.os.struct.

I noticed that data.py currently does not utilize qiling.os.struct. My main concern is about consistency: Should I refactor the existing code to align with this approach, or should I maintain the current implementation style within data.py?

Furthermore, the current parsing logic operates using file offsets/pointers (or stream positions) rather than memory pointers. For example, calculating the offset is necessary to locate the next structure in the file. Using a memory-centric utility like qiling.os.struct might not be directly applicable or could introduce unnecessary complexity for file-based parsing.

Could you provide further guidance on how to best handle this disparity?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is a valid question.
Everything around MacOS emulation kind of froze in time and did not get refactored alongside the other components. This is why you can still see such relics of old practices. You can browse through Linux / ELF files to get the general idea of how a fresh code should look like.

I am not sure I am entirely following on the other comment about pointers and structures. The base classes available in os.struct are very powerful in terms of code readability and efficiency. You may use them in the exact same way as you would initialize a C structure and write it entirely to memory, or read data from memory and "cast" it into a C structure, making its members easily readable.

If there are more specific questions, hit me up on Telegram, that would be much quicker.

def __init__(self, data):
'''
struct dyld_chained_header {
uint32_t fixups_version;
uint32_t starts_offset;
uint32_t imports_offset;
uint32_t symbols_offset;
uint32_t imports_count;
uint32_t imports_format;
uint32_t symbols_format;
};
'''
self.fixups_version = unpack("<L", data[0:4])[0]
self.starts_offset = unpack("<L", data[4:8])[0]
self.imports_offset = unpack("<L", data[8:12])[0]
self.symbols_offset = unpack("<L", data[12:16])[0]
self.imports_count = unpack("<L", data[16:20])[0]
self.imports_format = unpack("<L", data[20:24])[0]
self.symbols_format = unpack("<L", data[24:28])[0]

class DyldChainedStartsInImage:
def __init__(self, data):
'''
struct dyld_chained_starts_in_image {
uint32_t seg_count;
uint32_t seg_info_offset[1];
};
'''
self.seg_count = unpack("<L", data[0:4])[0]
self.seg_info_offset = []
slide = 4
for i in range(self.seg_count):
self.seg_info_offset.append(unpack("<L", data[slide:slide +4])[0])
slide +=4


class DyldChainedStartsInSegment:
def __init__(self, data):
'''
struct dyld_chained_starts_in_segment {
uint32_t size;
uint16_t page_size;
uint16_t pointer_format;
uint64_t segment_offset;
uint32_t max_valid_pointer;
uint16_t page_count;
uint16_t page_start[1];
};
'''
self.size = unpack("<L", data[0:4])[0]
self.page_size = unpack("<H", data[4:6])[0]
self.pointer_format = unpack("<H", data[6:8])[0]
self.segment_offset = unpack("<Q", data[8:16])[0]
self.max_valid_pointer = unpack("<L", data[16:20])[0]
self.page_count = unpack("<H", data[20:22])[0]
self.page_start = []
slide = 22
for i in range(self.page_count):
self.page_start.append(unpack("<H", data[slide:slide +2])[0])
slide +=2

class DyldChainedImport:
def __init__(self, data):
'''
struct dyld_chained_import {
uint32_t lib_ordinal : 8;
uint32_t weak_import : 1;
uint32_t name_offset : 23;
};
'''
tmp = unpack("<L", data[0:4])[0]
self.lib_ordinal = tmp & 0xff
tmp >>= 8
self.weak_import = tmp & 0x1
tmp >>= 1
self.name_offset = tmp & 0x7fffff

self.symbol_name = None # to be filled later

class ChainedFixups:
def __init__(self, lc, data):
self.offset = lc.data_offset
self.size = lc.data_size
self.content = data[self.offset : self.offset + self.size]

self.header = DyldChainedHeader(self.content)
self.starts_in_image = DyldChainedStartsInImage(self.content[self.header.starts_offset:])
self.starts_in_segment = []
for i in range(self.starts_in_image.seg_count):
seg_offset = self.starts_in_image.seg_info_offset[i]
if seg_offset == 0:
self.starts_in_segment.append(None)
continue

seg_data = self.content[self.header.starts_offset + seg_offset:]
self.starts_in_segment.append(DyldChainedStartsInSegment(seg_data))

self.imports = []
symbol_pool = self.content[self.header.symbols_offset:]
for i in range(self.header.imports_count):
import_offset = self.header.imports_offset + i * 4
import_data = self.content[import_offset : import_offset + 4]

import_entry = DyldChainedImport(import_data)
import_entry.symbol_name = symbol_pool[import_entry.name_offset :].split(b'\0', 1)[0].decode()
self.imports.append(import_entry)



# def __str__(self):
# return (" ChainedFixupsInfo: content {}".format(self.content))
12 changes: 10 additions & 2 deletions qiling/loader/macho_parser/loadcommand.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ def get_complete(self):
LC_DYLD_CHAINED_FIXUPS : LoadDyldChainedFixups,
LC_RPATH : LoadRPath,
LC_ID_DYLIB : LoadIdDylib,
LC_BUILD_VERSION : LoadBuildVersion
LC_BUILD_VERSION : LoadBuildVersion,
LC_DYLD_CHAINED_FIXUPS : LoadDyldChainedFixups,
}

exec_func = cmd_map.get(self.cmd_id)
Expand Down Expand Up @@ -537,4 +538,11 @@ def __init__(self, data):
self.current_version = unpack("<L", self.FR.read(4))[0]
self.compatibility_version = unpack("<L", self.FR.read(4))[0]
self.FR.setOffset(self.name_offset)
self.name = self.FR.readString(4)
self.name = self.FR.readString(4)

class LoadDyldChainedFixups(LoadCommand):

def __init__(self, data):
super().__init__(data)
self.data_offset = unpack("<L", self.FR.read(4))[0]
self.data_size = unpack("<L", self.FR.read(4))[0]
2 changes: 2 additions & 0 deletions qiling/loader/macho_parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ def parseData(self):
self.seg_split_info = SegmentSplitInfo(command, self.binary_file)
elif command.cmd_id == LC_DYSYMTAB:
self.dysymbol_table = DySymbolTable(command, self.binary_file)
elif command.cmd_id == LC_DYLD_CHAINED_FIXUPS:
self.chained_fixups = ChainedFixups(command, self.binary_file)
return True

@staticmethod
Expand Down
32 changes: 29 additions & 3 deletions qiling/os/macos/macos.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,24 @@
#

from ctypes import sizeof
from typing import Dict, TextIO, Union, Callable, IO, List, Optional

from unicorn import UcError
from unicorn.x86_const import UC_X86_INS_SYSCALL

from qiling import Qiling
from qiling.arch.x86_utils import GDTManager, SegmentManager64
from qiling.cc import intel
from qiling.const import QL_ARCH, QL_OS, QL_VERBOSE
from qiling.const import QL_ARCH, QL_OS, QL_VERBOSE, QL_INTERCEPT
from qiling.os.fcall import QlFunctionCall
from qiling.os.posix.posix import QlOsPosix
from qiling.os.macos.events.macos import QlMacOSEvManager
from qiling.os.macos.events.macos_policy import QlMacOSPolicy
from qiling.os.macos.events.macos_structs import mac_policy_list_t
from qiling.os.macos.structs import kmod_info_t, POINTER64
from qiling.os.posix.syscall.abi import arm

from qiling.exception import QlErrorSyscallError, QlErrorSyscallNotFound, QlMemoryMappedError
from qiling.os.os import QlOs

class QlOsMacos(QlOsPosix):
type = QL_OS.MACOS
Expand Down Expand Up @@ -164,7 +166,31 @@ def load(self):

self.ql.hook_insn(self.hook_syscall, UC_X86_INS_SYSCALL)


self.ql.hook_code(self.hook_imports)

def hook_imports(self, ql: Qiling, address: int, size: int):
if address in ql.loader.import_symbols:
entry = ql.loader.import_symbols[address]
api_name = entry['name']

api_func = self.user_defined_api[QL_INTERCEPT.CALL].get(api_name)
if api_func:
try:
api_func(ql, address, api_name)
except Exception as ex:
ql.log.exception(ex)
ql.log.debug("%s Exception Found" % api_name)

raise QlErrorSyscallError("Macos API Implementation Error")
else:
ql.log.warning(f'api {api_name} is not implemented')

if ql.debug_stop:
raise QlErrorSyscallNotFound("Macos API implementation not found")

def set_api(self, target: str, handler: Callable, intercept: QL_INTERCEPT = QL_INTERCEPT.CALL):
QlOs.set_api(self, target, handler, intercept)

def hook_syscall(self, ql, intno = None):
return self.load_syscall()

Expand Down