-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwp_shrink4.alpha
152 lines (129 loc) · 3.37 KB
/
wp_shrink4.alpha
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
/* wp_whrink4.alpha: Dec assembler version of wp_whrink4.c */
/* coded for the alpha 21064 */
#include "asdef.alpha.h"
#define XUP 0
#define YUP 1
#define ZUP 2
#define TUP 3
#define TDOWN 4
#define ZDOWN 5
#define YDOWN 6
#define XDOWN 7
#define PLUS +1
#define MINUS -1
.globl wp_shrink_4dir
.ent wp_shrink_4dir 2
/* register names */
#define src a0 /* address of source wilson_vector */
#define dest0 a1 /* address of XUP dest. half_wilson_vector */
#define dest1 a2 /* address of YUP dest. half_wilson_vector */
#define dest2 a3 /* address of ZUP dest. half_wilson_vector */
#define dest3 a4 /* address of TUP dest. half_wilson_vector */
#define sign a5 /* sign: PLUS = +1, MINUS = -1 */
#define color t1 /* loop over colors */
#define scratch t2 /*scratch integer*/
wp_shrink_4dir:
bis zero,zero,color /* initialize color counter */
/* if (sign==-1) go to second half */
blt sign,MINUS_CODE
PLUS_CODE:
lds ft0,0(src) /* src->d[0].c[i].real */
lds ft1,4(src) /* src->d[0].c[i].imag */
lds ft2,24(src) /* src->d[1].c[i].real */
lds ft3,28(src) /* src->d[1].c[i].imag */
lds ft4,48(src) /* src->d[2].c[i].real */
lds ft5,52(src) /* src->d[2].c[i].imag */
lds ft6,72(src) /* src->d[3].c[i].real */
lds ft7,76(src) /* src->d[3].c[i].imag */
subs ft0,ft7,ft9 /* XUP direction */
adds ft1,ft6,ft10
subs ft2,ft5,ft11
adds ft3,ft4,ft12
sts ft9,0(dest0)
sts ft10,4(dest0)
sts ft11,24(dest0)
sts ft12,28(dest0)
subs ft0,ft6,ft9 /* YUP direction */
subs ft1,ft7,ft10
adds ft2,ft4,ft11
adds ft3,ft5,ft12
sts ft9,0(dest1)
sts ft10,4(dest1)
sts ft11,24(dest1)
sts ft12,28(dest1)
subs ft0,ft5,ft9 /* ZUP direction */
adds ft1,ft4,ft10
adds ft2,ft7,ft11
subs ft3,ft6,ft12
sts ft9,0(dest2)
sts ft10,4(dest2)
sts ft11,24(dest2)
sts ft12,28(dest2)
adds ft0,ft4,ft9 /* TUP direction */
adds ft1,ft5,ft10
adds ft2,ft6,ft11
adds ft3,ft7,ft12
sts ft9,0(dest3)
sts ft10,4(dest3)
sts ft11,24(dest3)
sts ft12,28(dest3)
addq color,1,color
addq src,8,src /* next color */
addq dest0,8,dest0
addq dest1,8,dest1
addq dest2,8,dest2
addq dest3,8,dest3
cmplt color,3,scratch
bne scratch,PLUS_CODE
ret zero,(ra)
MINUS_CODE:
lds ft0,0(src) /* src->d[0].c[i].real */
lds ft1,4(src) /* src->d[0].c[i].imag */
lds ft2,24(src) /* src->d[1].c[i].real */
lds ft3,28(src) /* src->d[1].c[i].imag */
lds ft4,48(src) /* src->d[2].c[i].real */
lds ft5,52(src) /* src->d[2].c[i].imag */
lds ft6,72(src) /* src->d[3].c[i].real */
lds ft7,76(src) /* src->d[3].c[i].imag */
adds ft0,ft7,ft9 /* XDOWN direction */
subs ft1,ft6,ft10
adds ft2,ft5,ft11
subs ft3,ft4,ft12
sts ft9,0(dest0)
sts ft10,4(dest0)
sts ft11,24(dest0)
sts ft12,28(dest0)
adds ft0,ft6,ft9 /* YDOWN direction */
adds ft1,ft7,ft10
subs ft2,ft4,ft11
subs ft3,ft5,ft12
sts ft9,0(dest1)
sts ft10,4(dest1)
sts ft11,24(dest1)
sts ft12,28(dest1)
adds ft0,ft5,ft9 /* ZDOWN direction */
subs ft1,ft4,ft10
subs ft2,ft7,ft11
adds ft3,ft6,ft12
sts ft9,0(dest2)
sts ft10,4(dest2)
sts ft11,24(dest2)
sts ft12,28(dest2)
subs ft0,ft4,ft9 /* TDOWN direction */
subs ft1,ft5,ft10
subs ft2,ft6,ft11
subs ft3,ft7,ft12
sts ft9,0(dest3)
sts ft10,4(dest3)
sts ft11,24(dest3)
sts ft12,28(dest3)
addq color,1,color
addq src,8,src /* next color */
addq dest0,8,dest0
addq dest1,8,dest1
addq dest2,8,dest2
addq dest3,8,dest3
cmplt color,3,scratch
bne scratch,MINUS_CODE
ret zero,(ra)
.end wp_shrink_4dir