-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse_8086_machine_code.nim
More file actions
370 lines (307 loc) · 12.6 KB
/
parse_8086_machine_code.nim
File metadata and controls
370 lines (307 loc) · 12.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
import std/[bitops, sequtils, strformat, strutils, sugar, tables]
# Debug print on/off
const
VERBOSE: bool = true
# Type Definitions
type
InstrFieldKind = enum
Bits_Literal
Bits_MOD
Bits_REG
Bits_RM
Bits_D
Bits_W
Bits_S
Bits_HasData
Bits_HasDataW
Bits_HasAddr
Bits_HasLabel
InstrField = object
kind: InstrFieldKind
nBits: uint8
value: uint8
InstrFormat = object
kind, dscr: string
nBytes: uint8
fields: seq[InstrField]
DecodeError = object of CatchableError
# Constants
const
REGISTER = {
0b000'u16: {0b0'u16: "AL", 0b1'u16: "AX"}.toTable,
0b001'u16: {0b0'u16: "CL", 0b1'u16: "CX"}.toTable,
0b010'u16: {0b0'u16: "DL", 0b1'u16: "DX"}.toTable,
0b011'u16: {0b0'u16: "BL", 0b1'u16: "BX"}.toTable,
0b100'u16: {0b0'u16: "AH", 0b1'u16: "SP"}.toTable,
0b101'u16: {0b0'u16: "CH", 0b1'u16: "BP"}.toTable,
0b110'u16: {0b0'u16: "DH", 0b1'u16: "SI"}.toTable,
0b111'u16: {0b0'u16: "BH", 0b1'u16: "DI"}.toTable,
}.toTable
MOD00 = {
0b000'u16: "[BX + SI]",
0b001'u16: "[BX + DI]",
0b010'u16: "[BP + SI]",
0b011'u16: "[BP + DI]",
0b100'u16: "[SI]",
0b101'u16: "[DI]",
0b110'u16: "[data]",
0b111'u16: "[BX]",
}.toTable
MOD01 = {
0b000'u16: "[BX + SI + D8]",
0b001'u16: "[BX + DI + D8]",
0b010'u16: "[BP + SI + D8]",
0b011'u16: "[BP + DI + D8]",
0b100'u16: "[SI + D8]",
0b101'u16: "[DI + D8]",
0b110'u16: "[BP + D8]",
0b111'u16: "[BX + D8]",
}.toTable
MOD10 = {
0b000'u16: "[BX + SI + D16]",
0b001'u16: "[BX + DI + D16]",
0b010'u16: "[BP + SI + D16]",
0b011'u16: "[BP + DI + D16]",
0b100'u16: "[SI + D16]",
0b101'u16: "[DI + D16]",
0b110'u16: "[BP + D16]",
0b111'u16: "[BX + D16]",
}.toTable
# Instruction Definition Helpers
proc B(n: uint8, value: uint8): InstrField = InstrField(kind: Bits_Literal, nBits: n, value: value)
template D(): InstrField = InstrField(kind: Bits_D, nBits: 1)
template W(): InstrField = InstrField(kind: Bits_W, nBits: 1)
template S(): InstrField = InstrField(kind: Bits_S, nBits: 1)
template MOD(): InstrField = InstrField(kind: Bits_MOD, nBits: 2)
template REG(): InstrField = InstrField(kind: Bits_REG, nBits: 3)
template RM(): InstrField = InstrField(kind: Bits_RM, nBits: 3)
template DATA(): InstrField = InstrField(kind: Bits_HasData, value: 1)
template DATAW(): InstrField = InstrField(kind: Bits_HasDataW, value: 1)
template ADDR(): InstrField = InstrField(kind: Bits_HasAddr, value: 1)
template IP_INC8(): InstrField = InstrField(kind: Bits_HasLabel, value: 1)
proc setD(value: uint8): InstrField = InstrField(kind: Bits_D, value: value)
proc setW(value: uint8): InstrField = InstrField(kind: Bits_W, value: value)
proc instr(kind: string, nBytes: uint8, fields: seq[InstrField], dscr: string): InstrFormat =
InstrFormat(kind: kind, dscr: dscr, nBytes:nBytes, fields: fields)
# Instruction Definitions
const instructions = @[
# MOVs:
instr("mov", 2, @[B(6, 0b100010), D, W, MOD, REG, RM], "reg/mem to/from reg"),
instr("mov", 2, @[B(7, 0b1100011), W, MOD, B(3, 0b000), RM, DATA, DATAW], "imd to reg/mem"),
instr("mov", 1, @[B(4, 0b1011), W, REG, DATA, DATAW, setD(1)], "imd to reg"),
instr("mov", 1, @[B(7, 0b1010000), W, ADDR, setD(1)], "mem to acc"),
instr("mov", 1, @[B(7, 0b1010001), W, ADDR], "acc to mem"),
# instr("mov", 1, @[B(8, 0b10001110), MOD, 0, SR, RM], "reg/mem to segment reg"),
# instr("mov", 1, @[B(8, 0b10001100), MOD, 0, SR, RM], "segment reg to reg/mem"),
# PUSHs
instr("push", 2, @[B(8, 0b11111111), MOD, B(3, 0b110), RM, setW(1)], "reg/mem"),
instr("push", 1, @[B(5, 0b01010), REG, setW(1)], "reg"),
instr("push", 1, @[B(3, 0b000), REG, B(3, 0b110), setW(1)], "seg reg"),
# ADDs:
instr("add", 2, @[B(6, 0b000000), D, W, MOD, REG, RM], "reg/mem with reg to either"),
instr("add", 2, @[B(6, 0b100000), S, W, MOD, B(3, 0b000), RM, DATA, DATAW], "imd to r/m"),
instr("add", 1, @[B(7, 0b0000010), W, DATA, DATAW, setD(1)], "imd to acc"),
# SUBs:
instr("sub", 2, @[B(6, 0b001010), D, W, MOD, REG, RM], "reg/mem and reg from either"),
instr("sub", 2, @[B(6, 0b100000), S, W, MOD, B(3, 0b101), RM, DATA, DATAW], "imd from r/m"),
instr("sub", 1, @[B(7, 0b0010110), W, DATA, DATAW, setD(1)], "imd from acc"),
# CMPs:
instr("cmp", 2, @[B(6, 0b001110), D, W, MOD, REG, RM], "reg/mem and reg"),
instr("cmp", 2, @[B(6, 0b100000), S, W, MOD, B(3, 0b111), RM, DATA, DATAW], "imd with r/m"),
# NOTE(gilles): added DATAW even though the manual specifies data to be 8 bits only
instr("cmp", 1, @[B(7, 0b0011110), W, DATA, DATAW, setD(1)], "imd with acc"),
# JMPs:
instr("je", 1, @[B(8, 0b01110100), IP_INC8], "jmp on equal/zero"),
instr("jl", 1, @[B(8, 0b01111100), IP_INC8], "jmp on less/not greater or equal"),
instr("jle", 1, @[B(8, 0b01111110), IP_INC8], "jmp on less or equal/not greater"),
instr("jb", 1, @[B(8, 0b01110010), IP_INC8], "jmp on below/not above or equal"),
instr("jbe", 1, @[B(8, 0b01110110), IP_INC8], "jmp on below or equal/not above"),
instr("jp", 1, @[B(8, 0b01111010), IP_INC8], "jmp on parity/parity even"),
instr("jo", 1, @[B(8, 0b01110000), IP_INC8], "jmp on overflow"),
instr("js", 1, @[B(8, 0b01111000), IP_INC8], "jmp on sign"),
instr("jne", 1, @[B(8, 0b01110101), IP_INC8], "jmp on not equal/not zero"),
instr("jnl", 1, @[B(8, 0b01111101), IP_INC8], "jmp on not less/greater or equal"),
instr("jnle", 1, @[B(8, 0b01111111), IP_INC8], "jmp on not less or equal/greater"),
instr("jnb", 1, @[B(8, 0b01110011), IP_INC8], "jmp on not below/above or equal"),
instr("jnbe", 1, @[B(8, 0b01110111), IP_INC8], "jmp on not below or equal/above"),
instr("jnp", 1, @[B(8, 0b01111011), IP_INC8], "jmp on not par/par odd"),
instr("jno", 1, @[B(8, 0b01110001), IP_INC8], "jmp on not overflow"),
instr("jns", 1, @[B(8, 0b01111001), IP_INC8], "jmp on not sign"),
instr("loop", 1, @[B(8, 0b11100010), IP_INC8], "loop CX times"),
instr("loopz", 1, @[B(8, 0b11100001), IP_INC8], "loop while zero/equal"),
instr("loopnz", 1, @[B(8, 0b11100000), IP_INC8], "loop while not zero/equal"),
instr("jcxz", 1, @[B(8, 0b11100011), IP_INC8], "jmp on CX zero"),
]
proc formatInstruction(s: string): string =
s.toLower.replace("+ -", "- ").replace(" + 0")
proc concatTwoBytes(low, high: byte): int16 =
return (high.int16 shl 8) or low.int16
proc extendSign(b: byte): int16 =
if b.testBit(7):
result = (0b1111_1111.int16 shl 8) or b.int16
else:
result = (0b0000_0000.int16 shl 8) or b.int16
proc parseInstructionFields(bytes: seq[byte], instr: InstrFormat): array[InstrFieldKind, uint16] =
var
byteIndex, bitIndex: int
opCodeShift: int = 0
for field in instr.fields:
if field.kind == Bits_Literal:
# Test if next bits in the byte stream match the op code bits
if bytes[byteIndex].bitsliced(8-bitIndex-field.nBits.int ..< 8-bitIndex) == field.value:
result[field.kind] = result[field.kind] shl opCodeShift or field.value
opCodeShift = field.nBits.int
if VERBOSE:
echo ">>> ", "opCodeShift ", result[field.kind].int.toBin(field.nBits)
else:
raise newException(DecodeError, "Op code bits do not match")
else:
if field.nBits > 0:
result[field.kind] = bytes[byteIndex].bitsliced(
8 - bitIndex - field.nBits.int ..< 8 - bitIndex
).uint8
else:
result[field.kind] = field.value
bitIndex += field.nBits.int
if bitIndex >= 8:
bitIndex = 0
byteIndex += 1
proc disassemble8086MachineCode*(byteStream: seq[byte]): seq[string] =
var instrPointer: int = 0
while instrPointer < byteStream.high:
let lastIdx = instrPointer
for instr in instructions:
let context = byteStream[instrPointer ..< instrPointer + instr.nBytes.int]
var parsedInstrFields: array[InstrFieldKind, uint16]
try:
parsedInstrFields = parseInstructionFields(context, instr)
except DecodeError:
continue
if VERBOSE:
echo byteStream[instrPointer ..< instrPointer + instr.nBytes.int].mapIt(it.int.toBin(8)).join(" ")
instrPointer += instr.nBytes.int
let
mode = parsedInstrFields[Bits_MOD]
reg = parsedInstrFields[Bits_REG]
rm = parsedInstrFields[Bits_RM]
d = parsedInstrFields[Bits_D] # Instruction SOURCE(d=0) or DESTINATION(d=1) is specified in reg field
w = parsedInstrFields[Bits_W] # Instruction operates on BYTE(w=0) or WORD(w=1) data
s = parsedInstrFields[Bits_S] # No sign extenstion / Sign extend 8-bit immediata data to 16 bits if W=1
var
operand1, operand2, dataString, dispString: string
dataBits, dispBits: int16
let
instrFields: seq[InstrFieldKind] = instr.fields.map(x => x.kind)
hasAddr = parsedInstrFields[Bits_HasAddr] == 0b1
hasDirectAddr = (mode == 0b00) and (rm == 0b110)
hasDisp8 = (mode == 0b01)
hasDisp16 = (mode == 0b10) or hasDirectAddr
hasData = parsedInstrFields[Bits_HasData] == 0b1
hasDataW = parsedInstrFields[Bits_HasDataW] == 0b1
wideData = hasDataW and w == 0b1 and s == 0b0
hasLabel = parsedInstrFields[Bits_HasLabel] == 0b1
if hasDisp8 or hasLabel:
dispBits = extendSign(byteStream[instrPointer])
dispString = $dispBits
instrPointer += 1
if hasDisp16:
dispBits = concatTwoBytes(byteStream[instrPointer], byteStream[instrPointer + 1])
dispString = $dispBits
instrPointer += 2
if hasData:
if wideData:
dataBits = concatTwoBytes(byteStream[instrPointer], byteStream[instrPointer + 1])
dataString = $dataBits
instrPointer += 2
else:
dataBits = extendSign(byteStream[instrPointer])
dataString = $dataBits
instrPointer += 1
if hasAddr:
dataBits = concatTwoBytes(byteStream[instrPointer], byteStream[instrPointer + 1])
dataString = &"[{dataBits}]"
instrPointer += 2
if VERBOSE:
echo(&"data: ", dataString)
echo(&"disp: ", dispString)
# memory to/from register/memory
if (Bits_REG in instrFields) and (Bits_RM in instrFields):
if VERBOSE: echo "mem <-> r/m"
operand1 = REGISTER[reg][w]
case mode:
of 0b00:
if hasDirectAddr:
operand2 = &"[{dispBits}]"
else:
operand2 = MOD00[rm]
of 0b01:
operand2 = MOD01[rm].replace("D8", dispString)
of 0b10:
operand2 = MOD10[rm].replace("D16", dispString)
of 0b11:
operand2 = REGISTER[rm][w]
else:
doAssert false, "unreachable"
# immediate to/from register
elif Bits_REG in instrFields:
if VERBOSE: echo "imm <-> reg"
operand1 = REGISTER[reg][w]
operand2 = dataString
# immediate to/from register/memory
elif Bits_RM in instrFields:
if VERBOSE: echo "imm <-> r/m"
operand1 = dataString
case mode:
of 0b00:
if hasDirectAddr:
operand2 = &"[{dispBits}]"
else:
operand2 = MOD00[rm]
of 0b01:
operand2 = MOD01[rm].replace("D8", dispString)
of 0b10:
operand2 = MOD10[rm].replace("D16", dispString)
of 0b11:
operand2 = REGISTER[rm][w]
else:
assert false, "unreachable"
if mode != 0b11:
if w == 0b0:
operand2 = &"byte {operand2}"
else:
operand2 = &"word {operand2}"
# memory to/from accumulator
else:
if VERBOSE: echo "mem -> acc"
operand1 = {0b0'u16: "AL", 0b1'u16: "AX"}.toTable[w]
operand2 = dataString
# Direction
if d == 0b1:
var tmp: string
tmp = operand1
operand1 = operand2
operand2 = tmp
var x86Instruction: string
if hasLabel:
x86Instruction = replace(&"{instr.kind} $ + 2 + {dispString}", "+ -", "- ")
else:
x86Instruction = formatInstruction(&"{instr.kind} {operand2}, {operand1}")
result.add(x86Instruction)
if VERBOSE:
echo(x86Instruction & "\n")
break
if instrPointer == lastIdx:
let nextByte = byteStream[instrPointer].int.toBin(8)
raise newException(Exception, &"Failed to detect OP Code in {nextByte}")
when isMainModule:
from utils import test_part1_listing
for fname in @[
"listing_0037_single_register_mov.asm",
"listing_0038_many_register_mov.asm",
"listing_0039_more_movs.asm",
"listing_0040_challenge_movs.asm",
"listing_0041_add_sub_cmp_jnz.asm",
]:
echo fname
test_part1_listing(fname, verbose=false)
echo ""